1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
53 /* Return index of given mode in mult and division cost tables. */
54 #define MODE_INDEX(mode) \
55 ((mode) == QImode ? 0 \
56 : (mode) == HImode ? 1 \
57 : (mode) == SImode ? 2 \
58 : (mode) == DImode ? 3 \
61 /* Processor costs (relative to an add) */
63 struct processor_costs size_cost = { /* costs for tunning for size */
64 2, /* cost of an add instruction */
65 3, /* cost of a lea instruction */
66 2, /* variable shift costs */
67 3, /* constant shift costs */
68 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
69 0, /* cost of multiply per each bit set */
70 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
71 3, /* cost of movsx */
72 3, /* cost of movzx */
75 2, /* cost for loading QImode using movzbl */
76 {2, 2, 2}, /* cost of loading integer registers
77 in QImode, HImode and SImode.
78 Relative to reg-reg move (2). */
79 {2, 2, 2}, /* cost of storing integer registers */
80 2, /* cost of reg,reg fld/fst */
81 {2, 2, 2}, /* cost of loading fp registers
82 in SFmode, DFmode and XFmode */
83 {2, 2, 2}, /* cost of loading integer registers */
84 3, /* cost of moving MMX register */
85 {3, 3}, /* cost of loading MMX registers
86 in SImode and DImode */
87 {3, 3}, /* cost of storing MMX registers
88 in SImode and DImode */
89 3, /* cost of moving SSE register */
90 {3, 3, 3}, /* cost of loading SSE registers
91 in SImode, DImode and TImode */
92 {3, 3, 3}, /* cost of storing SSE registers
93 in SImode, DImode and TImode */
94 3, /* MMX or SSE register to integer */
95 0, /* size of prefetch block */
96 0, /* number of parallel prefetches */
98 2, /* cost of FADD and FSUB insns. */
99 2, /* cost of FMUL instruction. */
100 2, /* cost of FDIV instruction. */
101 2, /* cost of FABS instruction. */
102 2, /* cost of FCHS instruction. */
103 2, /* cost of FSQRT instruction. */
106 /* Processor costs (relative to an add) */
108 struct processor_costs i386_cost = { /* 386 specific costs */
109 1, /* cost of an add instruction */
110 1, /* cost of a lea instruction */
111 3, /* variable shift costs */
112 2, /* constant shift costs */
113 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
114 1, /* cost of multiply per each bit set */
115 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
116 3, /* cost of movsx */
117 2, /* cost of movzx */
118 15, /* "large" insn */
120 4, /* cost for loading QImode using movzbl */
121 {2, 4, 2}, /* cost of loading integer registers
122 in QImode, HImode and SImode.
123 Relative to reg-reg move (2). */
124 {2, 4, 2}, /* cost of storing integer registers */
125 2, /* cost of reg,reg fld/fst */
126 {8, 8, 8}, /* cost of loading fp registers
127 in SFmode, DFmode and XFmode */
128 {8, 8, 8}, /* cost of loading integer registers */
129 2, /* cost of moving MMX register */
130 {4, 8}, /* cost of loading MMX registers
131 in SImode and DImode */
132 {4, 8}, /* cost of storing MMX registers
133 in SImode and DImode */
134 2, /* cost of moving SSE register */
135 {4, 8, 16}, /* cost of loading SSE registers
136 in SImode, DImode and TImode */
137 {4, 8, 16}, /* cost of storing SSE registers
138 in SImode, DImode and TImode */
139 3, /* MMX or SSE register to integer */
140 0, /* size of prefetch block */
141 0, /* number of parallel prefetches */
143 23, /* cost of FADD and FSUB insns. */
144 27, /* cost of FMUL instruction. */
145 88, /* cost of FDIV instruction. */
146 22, /* cost of FABS instruction. */
147 24, /* cost of FCHS instruction. */
148 122, /* cost of FSQRT instruction. */
152 struct processor_costs i486_cost = { /* 486 specific costs */
153 1, /* cost of an add instruction */
154 1, /* cost of a lea instruction */
155 3, /* variable shift costs */
156 2, /* constant shift costs */
157 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
158 1, /* cost of multiply per each bit set */
159 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
160 3, /* cost of movsx */
161 2, /* cost of movzx */
162 15, /* "large" insn */
164 4, /* cost for loading QImode using movzbl */
165 {2, 4, 2}, /* cost of loading integer registers
166 in QImode, HImode and SImode.
167 Relative to reg-reg move (2). */
168 {2, 4, 2}, /* cost of storing integer registers */
169 2, /* cost of reg,reg fld/fst */
170 {8, 8, 8}, /* cost of loading fp registers
171 in SFmode, DFmode and XFmode */
172 {8, 8, 8}, /* cost of loading integer registers */
173 2, /* cost of moving MMX register */
174 {4, 8}, /* cost of loading MMX registers
175 in SImode and DImode */
176 {4, 8}, /* cost of storing MMX registers
177 in SImode and DImode */
178 2, /* cost of moving SSE register */
179 {4, 8, 16}, /* cost of loading SSE registers
180 in SImode, DImode and TImode */
181 {4, 8, 16}, /* cost of storing SSE registers
182 in SImode, DImode and TImode */
183 3, /* MMX or SSE register to integer */
184 0, /* size of prefetch block */
185 0, /* number of parallel prefetches */
187 8, /* cost of FADD and FSUB insns. */
188 16, /* cost of FMUL instruction. */
189 73, /* cost of FDIV instruction. */
190 3, /* cost of FABS instruction. */
191 3, /* cost of FCHS instruction. */
192 83, /* cost of FSQRT instruction. */
196 struct processor_costs pentium_cost = {
197 1, /* cost of an add instruction */
198 1, /* cost of a lea instruction */
199 4, /* variable shift costs */
200 1, /* constant shift costs */
201 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
202 0, /* cost of multiply per each bit set */
203 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
204 3, /* cost of movsx */
205 2, /* cost of movzx */
206 8, /* "large" insn */
208 6, /* cost for loading QImode using movzbl */
209 {2, 4, 2}, /* cost of loading integer registers
210 in QImode, HImode and SImode.
211 Relative to reg-reg move (2). */
212 {2, 4, 2}, /* cost of storing integer registers */
213 2, /* cost of reg,reg fld/fst */
214 {2, 2, 6}, /* cost of loading fp registers
215 in SFmode, DFmode and XFmode */
216 {4, 4, 6}, /* cost of loading integer registers */
217 8, /* cost of moving MMX register */
218 {8, 8}, /* cost of loading MMX registers
219 in SImode and DImode */
220 {8, 8}, /* cost of storing MMX registers
221 in SImode and DImode */
222 2, /* cost of moving SSE register */
223 {4, 8, 16}, /* cost of loading SSE registers
224 in SImode, DImode and TImode */
225 {4, 8, 16}, /* cost of storing SSE registers
226 in SImode, DImode and TImode */
227 3, /* MMX or SSE register to integer */
228 0, /* size of prefetch block */
229 0, /* number of parallel prefetches */
231 3, /* cost of FADD and FSUB insns. */
232 3, /* cost of FMUL instruction. */
233 39, /* cost of FDIV instruction. */
234 1, /* cost of FABS instruction. */
235 1, /* cost of FCHS instruction. */
236 70, /* cost of FSQRT instruction. */
240 struct processor_costs pentiumpro_cost = {
241 1, /* cost of an add instruction */
242 1, /* cost of a lea instruction */
243 1, /* variable shift costs */
244 1, /* constant shift costs */
245 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
246 0, /* cost of multiply per each bit set */
247 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
248 1, /* cost of movsx */
249 1, /* cost of movzx */
250 8, /* "large" insn */
252 2, /* cost for loading QImode using movzbl */
253 {4, 4, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 2, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {2, 2, 8}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 32, /* size of prefetch block */
273 6, /* number of parallel prefetches */
275 3, /* cost of FADD and FSUB insns. */
276 5, /* cost of FMUL instruction. */
277 56, /* cost of FDIV instruction. */
278 2, /* cost of FABS instruction. */
279 2, /* cost of FCHS instruction. */
280 56, /* cost of FSQRT instruction. */
284 struct processor_costs k6_cost = {
285 1, /* cost of an add instruction */
286 2, /* cost of a lea instruction */
287 1, /* variable shift costs */
288 1, /* constant shift costs */
289 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
290 0, /* cost of multiply per each bit set */
291 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
292 2, /* cost of movsx */
293 2, /* cost of movzx */
294 8, /* "large" insn */
296 3, /* cost for loading QImode using movzbl */
297 {4, 5, 4}, /* cost of loading integer registers
298 in QImode, HImode and SImode.
299 Relative to reg-reg move (2). */
300 {2, 3, 2}, /* cost of storing integer registers */
301 4, /* cost of reg,reg fld/fst */
302 {6, 6, 6}, /* cost of loading fp registers
303 in SFmode, DFmode and XFmode */
304 {4, 4, 4}, /* cost of loading integer registers */
305 2, /* cost of moving MMX register */
306 {2, 2}, /* cost of loading MMX registers
307 in SImode and DImode */
308 {2, 2}, /* cost of storing MMX registers
309 in SImode and DImode */
310 2, /* cost of moving SSE register */
311 {2, 2, 8}, /* cost of loading SSE registers
312 in SImode, DImode and TImode */
313 {2, 2, 8}, /* cost of storing SSE registers
314 in SImode, DImode and TImode */
315 6, /* MMX or SSE register to integer */
316 32, /* size of prefetch block */
317 1, /* number of parallel prefetches */
319 2, /* cost of FADD and FSUB insns. */
320 2, /* cost of FMUL instruction. */
321 56, /* cost of FDIV instruction. */
322 2, /* cost of FABS instruction. */
323 2, /* cost of FCHS instruction. */
324 56, /* cost of FSQRT instruction. */
328 struct processor_costs athlon_cost = {
329 1, /* cost of an add instruction */
330 2, /* cost of a lea instruction */
331 1, /* variable shift costs */
332 1, /* constant shift costs */
333 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
334 0, /* cost of multiply per each bit set */
335 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
336 1, /* cost of movsx */
337 1, /* cost of movzx */
338 8, /* "large" insn */
340 4, /* cost for loading QImode using movzbl */
341 {3, 4, 3}, /* cost of loading integer registers
342 in QImode, HImode and SImode.
343 Relative to reg-reg move (2). */
344 {3, 4, 3}, /* cost of storing integer registers */
345 4, /* cost of reg,reg fld/fst */
346 {4, 4, 12}, /* cost of loading fp registers
347 in SFmode, DFmode and XFmode */
348 {6, 6, 8}, /* cost of loading integer registers */
349 2, /* cost of moving MMX register */
350 {4, 4}, /* cost of loading MMX registers
351 in SImode and DImode */
352 {4, 4}, /* cost of storing MMX registers
353 in SImode and DImode */
354 2, /* cost of moving SSE register */
355 {4, 4, 6}, /* cost of loading SSE registers
356 in SImode, DImode and TImode */
357 {4, 4, 5}, /* cost of storing SSE registers
358 in SImode, DImode and TImode */
359 5, /* MMX or SSE register to integer */
360 64, /* size of prefetch block */
361 6, /* number of parallel prefetches */
363 4, /* cost of FADD and FSUB insns. */
364 4, /* cost of FMUL instruction. */
365 24, /* cost of FDIV instruction. */
366 2, /* cost of FABS instruction. */
367 2, /* cost of FCHS instruction. */
368 35, /* cost of FSQRT instruction. */
372 struct processor_costs k8_cost = {
373 1, /* cost of an add instruction */
374 2, /* cost of a lea instruction */
375 1, /* variable shift costs */
376 1, /* constant shift costs */
377 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
378 0, /* cost of multiply per each bit set */
379 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
380 1, /* cost of movsx */
381 1, /* cost of movzx */
382 8, /* "large" insn */
384 4, /* cost for loading QImode using movzbl */
385 {3, 4, 3}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {3, 4, 3}, /* cost of storing integer registers */
389 4, /* cost of reg,reg fld/fst */
390 {4, 4, 12}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {6, 6, 8}, /* cost of loading integer registers */
393 2, /* cost of moving MMX register */
394 {3, 3}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {4, 4}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {4, 3, 6}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {4, 4, 5}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 5, /* MMX or SSE register to integer */
404 64, /* size of prefetch block */
405 6, /* number of parallel prefetches */
407 4, /* cost of FADD and FSUB insns. */
408 4, /* cost of FMUL instruction. */
409 19, /* cost of FDIV instruction. */
410 2, /* cost of FABS instruction. */
411 2, /* cost of FCHS instruction. */
412 35, /* cost of FSQRT instruction. */
416 struct processor_costs pentium4_cost = {
417 1, /* cost of an add instruction */
418 1, /* cost of a lea instruction */
419 4, /* variable shift costs */
420 4, /* constant shift costs */
421 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
422 0, /* cost of multiply per each bit set */
423 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
424 1, /* cost of movsx */
425 1, /* cost of movzx */
426 16, /* "large" insn */
428 2, /* cost for loading QImode using movzbl */
429 {4, 5, 4}, /* cost of loading integer registers
430 in QImode, HImode and SImode.
431 Relative to reg-reg move (2). */
432 {2, 3, 2}, /* cost of storing integer registers */
433 2, /* cost of reg,reg fld/fst */
434 {2, 2, 6}, /* cost of loading fp registers
435 in SFmode, DFmode and XFmode */
436 {4, 4, 6}, /* cost of loading integer registers */
437 2, /* cost of moving MMX register */
438 {2, 2}, /* cost of loading MMX registers
439 in SImode and DImode */
440 {2, 2}, /* cost of storing MMX registers
441 in SImode and DImode */
442 12, /* cost of moving SSE register */
443 {12, 12, 12}, /* cost of loading SSE registers
444 in SImode, DImode and TImode */
445 {2, 2, 8}, /* cost of storing SSE registers
446 in SImode, DImode and TImode */
447 10, /* MMX or SSE register to integer */
448 64, /* size of prefetch block */
449 6, /* number of parallel prefetches */
451 5, /* cost of FADD and FSUB insns. */
452 7, /* cost of FMUL instruction. */
453 43, /* cost of FDIV instruction. */
454 2, /* cost of FABS instruction. */
455 2, /* cost of FCHS instruction. */
456 43, /* cost of FSQRT instruction. */
459 const struct processor_costs *ix86_cost = &pentium_cost;
461 /* Processor feature/optimization bitmasks. */
462 #define m_386 (1<<PROCESSOR_I386)
463 #define m_486 (1<<PROCESSOR_I486)
464 #define m_PENT (1<<PROCESSOR_PENTIUM)
465 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
466 #define m_K6 (1<<PROCESSOR_K6)
467 #define m_ATHLON (1<<PROCESSOR_ATHLON)
468 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
469 #define m_K8 (1<<PROCESSOR_K8)
470 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
472 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
473 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
474 const int x86_zero_extend_with_and = m_486 | m_PENT;
475 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
476 const int x86_double_with_add = ~m_386;
477 const int x86_use_bit_test = m_386;
478 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
479 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
480 const int x86_3dnow_a = m_ATHLON_K8;
481 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
482 const int x86_branch_hints = m_PENT4;
483 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
484 const int x86_partial_reg_stall = m_PPRO;
485 const int x86_use_loop = m_K6;
486 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
487 const int x86_use_mov0 = m_K6;
488 const int x86_use_cltd = ~(m_PENT | m_K6);
489 const int x86_read_modify_write = ~m_PENT;
490 const int x86_read_modify = ~(m_PENT | m_PPRO);
491 const int x86_split_long_moves = m_PPRO;
492 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
493 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
494 const int x86_single_stringop = m_386 | m_PENT4;
495 const int x86_qimode_math = ~(0);
496 const int x86_promote_qi_regs = 0;
497 const int x86_himode_math = ~(m_PPRO);
498 const int x86_promote_hi_regs = m_PPRO;
499 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
500 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
501 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
502 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
503 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
504 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
505 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
506 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
507 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_decompose_lea = m_PENT4;
510 const int x86_shift1 = ~m_486;
511 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
512 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
513 /* Set for machines where the type and dependencies are resolved on SSE register
514 parts instead of whole registers, so we may maintain just lower part of
515 scalar values in proper format leaving the upper part undefined. */
516 const int x86_sse_partial_regs = m_ATHLON_K8;
517 /* Athlon optimizes partial-register FPS special case, thus avoiding the
518 need for extra instructions beforehand */
519 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
520 const int x86_sse_typeless_stores = m_ATHLON_K8;
521 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
522 const int x86_use_ffreep = m_ATHLON_K8;
523 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 /* In case the average insn count for single function invocation is
526 lower than this constant, emit fast (but longer) prologue and
528 #define FAST_PROLOGUE_INSN_COUNT 20
530 /* Set by prologue expander and used by epilogue expander to determine
532 static int use_fast_prologue_epilogue;
534 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
535 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
536 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
537 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
539 /* Array of the smallest class containing reg number REGNO, indexed by
540 REGNO. Used by REGNO_REG_CLASS in i386.h. */
542 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
545 AREG, DREG, CREG, BREG,
547 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
549 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
550 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
553 /* flags, fpsr, dirflag, frame */
554 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
555 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
557 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
561 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
565 /* The "default" register map used in 32bit mode. */
567 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
569 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
570 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
571 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
572 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
573 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
575 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
578 static int const x86_64_int_parameter_registers[6] =
580 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
581 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
584 static int const x86_64_int_return_registers[4] =
586 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
589 /* The "default" register map used in 64bit mode. */
590 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
592 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
593 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
594 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
595 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
596 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
597 8,9,10,11,12,13,14,15, /* extended integer registers */
598 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
601 /* Define the register numbers to be used in Dwarf debugging information.
602 The SVR4 reference port C compiler uses the following register numbers
603 in its Dwarf output code:
604 0 for %eax (gcc regno = 0)
605 1 for %ecx (gcc regno = 2)
606 2 for %edx (gcc regno = 1)
607 3 for %ebx (gcc regno = 3)
608 4 for %esp (gcc regno = 7)
609 5 for %ebp (gcc regno = 6)
610 6 for %esi (gcc regno = 4)
611 7 for %edi (gcc regno = 5)
612 The following three DWARF register numbers are never generated by
613 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
614 believes these numbers have these meanings.
615 8 for %eip (no gcc equivalent)
616 9 for %eflags (gcc regno = 17)
617 10 for %trapno (no gcc equivalent)
618 It is not at all clear how we should number the FP stack registers
619 for the x86 architecture. If the version of SDB on x86/svr4 were
620 a bit less brain dead with respect to floating-point then we would
621 have a precedent to follow with respect to DWARF register numbers
622 for x86 FP registers, but the SDB on x86/svr4 is so completely
623 broken with respect to FP registers that it is hardly worth thinking
624 of it as something to strive for compatibility with.
625 The version of x86/svr4 SDB I have at the moment does (partially)
626 seem to believe that DWARF register number 11 is associated with
627 the x86 register %st(0), but that's about all. Higher DWARF
628 register numbers don't seem to be associated with anything in
629 particular, and even for DWARF regno 11, SDB only seems to under-
630 stand that it should say that a variable lives in %st(0) (when
631 asked via an `=' command) if we said it was in DWARF regno 11,
632 but SDB still prints garbage when asked for the value of the
633 variable in question (via a `/' command).
634 (Also note that the labels SDB prints for various FP stack regs
635 when doing an `x' command are all wrong.)
636 Note that these problems generally don't affect the native SVR4
637 C compiler because it doesn't allow the use of -O with -g and
638 because when it is *not* optimizing, it allocates a memory
639 location for each floating-point variable, and the memory
640 location is what gets described in the DWARF AT_location
641 attribute for the variable in question.
642 Regardless of the severe mental illness of the x86/svr4 SDB, we
643 do something sensible here and we use the following DWARF
644 register numbers. Note that these are all stack-top-relative
646 11 for %st(0) (gcc regno = 8)
647 12 for %st(1) (gcc regno = 9)
648 13 for %st(2) (gcc regno = 10)
649 14 for %st(3) (gcc regno = 11)
650 15 for %st(4) (gcc regno = 12)
651 16 for %st(5) (gcc regno = 13)
652 17 for %st(6) (gcc regno = 14)
653 18 for %st(7) (gcc regno = 15)
655 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
657 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
658 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
659 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
660 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
661 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
663 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
666 /* Test and compare insns in i386.md store the information needed to
667 generate branch and scc insns here. */
669 rtx ix86_compare_op0 = NULL_RTX;
670 rtx ix86_compare_op1 = NULL_RTX;
672 /* The encoding characters for the four TLS models present in ELF. */
674 static char const tls_model_chars[] = " GLil";
676 #define MAX_386_STACK_LOCALS 3
677 /* Size of the register save area. */
678 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
680 /* Define the structure for the machine field in struct function. */
681 struct machine_function GTY(())
683 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
684 const char *some_ld_name;
685 int save_varrargs_registers;
686 int accesses_prev_frame;
689 #define ix86_stack_locals (cfun->machine->stack_locals)
690 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
692 /* Structure describing stack frame layout.
693 Stack grows downward:
699 saved frame pointer if frame_pointer_needed
700 <- HARD_FRAME_POINTER
706 > to_allocate <- FRAME_POINTER
718 int outgoing_arguments_size;
721 HOST_WIDE_INT to_allocate;
722 /* The offsets relative to ARG_POINTER. */
723 HOST_WIDE_INT frame_pointer_offset;
724 HOST_WIDE_INT hard_frame_pointer_offset;
725 HOST_WIDE_INT stack_pointer_offset;
728 /* Used to enable/disable debugging features. */
729 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
730 /* Code model option as passed by user. */
731 const char *ix86_cmodel_string;
733 enum cmodel ix86_cmodel;
735 const char *ix86_asm_string;
736 enum asm_dialect ix86_asm_dialect = ASM_ATT;
738 const char *ix86_tls_dialect_string;
739 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
741 /* Which unit we are generating floating point math for. */
742 enum fpmath_unit ix86_fpmath;
744 /* Which cpu are we scheduling for. */
745 enum processor_type ix86_cpu;
746 /* Which instruction set architecture to use. */
747 enum processor_type ix86_arch;
749 /* Strings to hold which cpu and instruction set architecture to use. */
750 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
751 const char *ix86_arch_string; /* for -march=<xxx> */
752 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
754 /* # of registers to use to pass arguments. */
755 const char *ix86_regparm_string;
757 /* true if sse prefetch instruction is not NOOP. */
758 int x86_prefetch_sse;
760 /* ix86_regparm_string as a number */
763 /* Alignment to use for loops and jumps: */
765 /* Power of two alignment for loops. */
766 const char *ix86_align_loops_string;
768 /* Power of two alignment for non-loop jumps. */
769 const char *ix86_align_jumps_string;
771 /* Power of two alignment for stack boundary in bytes. */
772 const char *ix86_preferred_stack_boundary_string;
774 /* Preferred alignment for stack boundary in bits. */
775 int ix86_preferred_stack_boundary;
777 /* Values 1-5: see jump.c */
778 int ix86_branch_cost;
779 const char *ix86_branch_cost_string;
781 /* Power of two alignment for functions. */
782 const char *ix86_align_funcs_string;
784 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
785 static char internal_label_prefix[16];
786 static int internal_label_prefix_len;
788 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
789 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
790 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
791 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
793 static const char *get_some_local_dynamic_name PARAMS ((void));
794 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
795 static rtx maybe_get_pool_constant PARAMS ((rtx));
796 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
797 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
799 static rtx get_thread_pointer PARAMS ((void));
800 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
801 static rtx gen_push PARAMS ((rtx));
802 static int memory_address_length PARAMS ((rtx addr));
803 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
804 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
805 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
806 static void ix86_dump_ppro_packet PARAMS ((FILE *));
807 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
808 static struct machine_function * ix86_init_machine_status PARAMS ((void));
809 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
810 static int ix86_nsaved_regs PARAMS ((void));
811 static void ix86_emit_save_regs PARAMS ((void));
812 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
813 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
814 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
815 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
816 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
817 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
818 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
819 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
820 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
821 static int ix86_issue_rate PARAMS ((void));
822 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
823 static void ix86_sched_init PARAMS ((FILE *, int, int));
824 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
825 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
826 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
827 static int ia32_multipass_dfa_lookahead PARAMS ((void));
828 static void ix86_init_mmx_sse_builtins PARAMS ((void));
829 static rtx x86_this_parameter PARAMS ((tree));
830 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree));
832 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
833 HOST_WIDE_INT, tree));
834 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
838 rtx base, index, disp;
842 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
843 static int ix86_address_cost PARAMS ((rtx));
844 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
846 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
847 static const char *ix86_strip_name_encoding PARAMS ((const char *))
850 struct builtin_description;
851 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
853 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
855 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
856 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
857 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
858 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
859 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
860 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
861 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
865 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
867 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
868 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
869 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
870 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
871 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
872 static int ix86_save_reg PARAMS ((unsigned int, int));
873 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
874 static int ix86_comp_type_attributes PARAMS ((tree, tree));
875 static int ix86_fntype_regparm PARAMS ((tree));
876 const struct attribute_spec ix86_attribute_table[];
877 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
878 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
879 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
880 static int ix86_value_regno PARAMS ((enum machine_mode));
881 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
882 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
883 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
884 static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
886 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
887 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
890 /* Register class used for passing given 64bit part of the argument.
891 These represent classes as documented by the PS ABI, with the exception
892 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
893 use SF or DFmode move instead of DImode to avoid reformatting penalties.
895 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
896 whenever possible (upper half does contain padding).
898 enum x86_64_reg_class
901 X86_64_INTEGER_CLASS,
902 X86_64_INTEGERSI_CLASS,
911 static const char * const x86_64_reg_class_name[] =
912 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
914 #define MAX_CLASSES 4
915 static int classify_argument PARAMS ((enum machine_mode, tree,
916 enum x86_64_reg_class [MAX_CLASSES],
918 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
920 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
922 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
923 enum x86_64_reg_class));
925 /* Initialize the GCC target structure. */
926 #undef TARGET_ATTRIBUTE_TABLE
927 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
928 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
929 # undef TARGET_MERGE_DECL_ATTRIBUTES
930 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
933 #undef TARGET_COMP_TYPE_ATTRIBUTES
934 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
936 #undef TARGET_INIT_BUILTINS
937 #define TARGET_INIT_BUILTINS ix86_init_builtins
939 #undef TARGET_EXPAND_BUILTIN
940 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
942 #undef TARGET_ASM_FUNCTION_EPILOGUE
943 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
945 #undef TARGET_ASM_OPEN_PAREN
946 #define TARGET_ASM_OPEN_PAREN ""
947 #undef TARGET_ASM_CLOSE_PAREN
948 #define TARGET_ASM_CLOSE_PAREN ""
950 #undef TARGET_ASM_ALIGNED_HI_OP
951 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
952 #undef TARGET_ASM_ALIGNED_SI_OP
953 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
955 #undef TARGET_ASM_ALIGNED_DI_OP
956 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
959 #undef TARGET_ASM_UNALIGNED_HI_OP
960 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
961 #undef TARGET_ASM_UNALIGNED_SI_OP
962 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
963 #undef TARGET_ASM_UNALIGNED_DI_OP
964 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
966 #undef TARGET_SCHED_ADJUST_COST
967 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
968 #undef TARGET_SCHED_ISSUE_RATE
969 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
970 #undef TARGET_SCHED_VARIABLE_ISSUE
971 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
972 #undef TARGET_SCHED_INIT
973 #define TARGET_SCHED_INIT ix86_sched_init
974 #undef TARGET_SCHED_REORDER
975 #define TARGET_SCHED_REORDER ix86_sched_reorder
976 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
977 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
978 ia32_use_dfa_pipeline_interface
979 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
980 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
981 ia32_multipass_dfa_lookahead
983 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
984 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
987 #undef TARGET_HAVE_TLS
988 #define TARGET_HAVE_TLS true
990 #undef TARGET_CANNOT_FORCE_CONST_MEM
991 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
993 #undef TARGET_DELEGITIMIZE_ADDRESS
994 #define TARGET_DELEGITIMIZE_ADDRESS i386_simplify_dwarf_addr
996 #undef TARGET_MS_BITFIELD_LAYOUT_P
997 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
999 #undef TARGET_ASM_OUTPUT_MI_THUNK
1000 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1001 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1002 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1004 #undef TARGET_RTX_COSTS
1005 #define TARGET_RTX_COSTS ix86_rtx_costs
1006 #undef TARGET_ADDRESS_COST
1007 #define TARGET_ADDRESS_COST ix86_address_cost
1009 struct gcc_target targetm = TARGET_INITIALIZER;
1011 /* Sometimes certain combinations of command options do not make
1012 sense on a particular target machine. You can define a macro
1013 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1014 defined, is executed once just after all the command options have
1017 Don't use this macro to turn on various extra optimizations for
1018 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1024 /* Comes from final.c -- no real reason to change it. */
1025 #define MAX_CODE_ALIGN 16
1029 const struct processor_costs *cost; /* Processor costs */
1030 const int target_enable; /* Target flags to enable. */
1031 const int target_disable; /* Target flags to disable. */
1032 const int align_loop; /* Default alignments. */
1033 const int align_loop_max_skip;
1034 const int align_jump;
1035 const int align_jump_max_skip;
1036 const int align_func;
1038 const processor_target_table[PROCESSOR_max] =
1040 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1041 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1042 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1043 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1044 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1045 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1046 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1047 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1050 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1053 const char *const name; /* processor name or nickname. */
1054 const enum processor_type processor;
1055 const enum pta_flags
1060 PTA_PREFETCH_SSE = 8,
1066 const processor_alias_table[] =
1068 {"i386", PROCESSOR_I386, 0},
1069 {"i486", PROCESSOR_I486, 0},
1070 {"i586", PROCESSOR_PENTIUM, 0},
1071 {"pentium", PROCESSOR_PENTIUM, 0},
1072 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1073 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1074 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1075 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1076 {"i686", PROCESSOR_PENTIUMPRO, 0},
1077 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1078 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1079 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1080 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1081 PTA_MMX | PTA_PREFETCH_SSE},
1082 {"k6", PROCESSOR_K6, PTA_MMX},
1083 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1084 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1085 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1087 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1088 | PTA_3DNOW | PTA_3DNOW_A},
1089 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1090 | PTA_3DNOW_A | PTA_SSE},
1091 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1092 | PTA_3DNOW_A | PTA_SSE},
1093 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1094 | PTA_3DNOW_A | PTA_SSE},
1095 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1096 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1099 int const pta_size = ARRAY_SIZE (processor_alias_table);
1101 /* By default our XFmode is the 80-bit extended format. If we have
1102 use TFmode instead, it's also the 80-bit format, but with padding. */
1103 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1104 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1106 /* Set the default values for switches whose default depends on TARGET_64BIT
1107 in case they weren't overwritten by command line options. */
1110 if (flag_omit_frame_pointer == 2)
1111 flag_omit_frame_pointer = 1;
1112 if (flag_asynchronous_unwind_tables == 2)
1113 flag_asynchronous_unwind_tables = 1;
1114 if (flag_pcc_struct_return == 2)
1115 flag_pcc_struct_return = 0;
1119 if (flag_omit_frame_pointer == 2)
1120 flag_omit_frame_pointer = 0;
1121 if (flag_asynchronous_unwind_tables == 2)
1122 flag_asynchronous_unwind_tables = 0;
1123 if (flag_pcc_struct_return == 2)
1124 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1127 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1128 SUBTARGET_OVERRIDE_OPTIONS;
1131 if (!ix86_cpu_string && ix86_arch_string)
1132 ix86_cpu_string = ix86_arch_string;
1133 if (!ix86_cpu_string)
1134 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1135 if (!ix86_arch_string)
1136 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1138 if (ix86_cmodel_string != 0)
1140 if (!strcmp (ix86_cmodel_string, "small"))
1141 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1143 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1144 else if (!strcmp (ix86_cmodel_string, "32"))
1145 ix86_cmodel = CM_32;
1146 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1147 ix86_cmodel = CM_KERNEL;
1148 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1149 ix86_cmodel = CM_MEDIUM;
1150 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1151 ix86_cmodel = CM_LARGE;
1153 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1157 ix86_cmodel = CM_32;
1159 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1161 if (ix86_asm_string != 0)
1163 if (!strcmp (ix86_asm_string, "intel"))
1164 ix86_asm_dialect = ASM_INTEL;
1165 else if (!strcmp (ix86_asm_string, "att"))
1166 ix86_asm_dialect = ASM_ATT;
1168 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1170 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1171 error ("code model `%s' not supported in the %s bit mode",
1172 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1173 if (ix86_cmodel == CM_LARGE)
1174 sorry ("code model `large' not supported yet");
1175 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1176 sorry ("%i-bit mode not compiled in",
1177 (target_flags & MASK_64BIT) ? 64 : 32);
1179 for (i = 0; i < pta_size; i++)
1180 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1182 ix86_arch = processor_alias_table[i].processor;
1183 /* Default cpu tuning to the architecture. */
1184 ix86_cpu = ix86_arch;
1185 if (processor_alias_table[i].flags & PTA_MMX
1186 && !(target_flags_explicit & MASK_MMX))
1187 target_flags |= MASK_MMX;
1188 if (processor_alias_table[i].flags & PTA_3DNOW
1189 && !(target_flags_explicit & MASK_3DNOW))
1190 target_flags |= MASK_3DNOW;
1191 if (processor_alias_table[i].flags & PTA_3DNOW_A
1192 && !(target_flags_explicit & MASK_3DNOW_A))
1193 target_flags |= MASK_3DNOW_A;
1194 if (processor_alias_table[i].flags & PTA_SSE
1195 && !(target_flags_explicit & MASK_SSE))
1196 target_flags |= MASK_SSE;
1197 if (processor_alias_table[i].flags & PTA_SSE2
1198 && !(target_flags_explicit & MASK_SSE2))
1199 target_flags |= MASK_SSE2;
1200 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1201 x86_prefetch_sse = true;
1202 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1203 error ("CPU you selected does not support x86-64 instruction set");
1208 error ("bad value (%s) for -march= switch", ix86_arch_string);
1210 for (i = 0; i < pta_size; i++)
1211 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1213 ix86_cpu = processor_alias_table[i].processor;
1214 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1215 error ("CPU you selected does not support x86-64 instruction set");
1218 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1219 x86_prefetch_sse = true;
1221 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1224 ix86_cost = &size_cost;
1226 ix86_cost = processor_target_table[ix86_cpu].cost;
1227 target_flags |= processor_target_table[ix86_cpu].target_enable;
1228 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1230 /* Arrange to set up i386_stack_locals for all functions. */
1231 init_machine_status = ix86_init_machine_status;
1233 /* Validate -mregparm= value. */
1234 if (ix86_regparm_string)
1236 i = atoi (ix86_regparm_string);
1237 if (i < 0 || i > REGPARM_MAX)
1238 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1244 ix86_regparm = REGPARM_MAX;
1246 /* If the user has provided any of the -malign-* options,
1247 warn and use that value only if -falign-* is not set.
1248 Remove this code in GCC 3.2 or later. */
1249 if (ix86_align_loops_string)
1251 warning ("-malign-loops is obsolete, use -falign-loops");
1252 if (align_loops == 0)
1254 i = atoi (ix86_align_loops_string);
1255 if (i < 0 || i > MAX_CODE_ALIGN)
1256 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1258 align_loops = 1 << i;
1262 if (ix86_align_jumps_string)
1264 warning ("-malign-jumps is obsolete, use -falign-jumps");
1265 if (align_jumps == 0)
1267 i = atoi (ix86_align_jumps_string);
1268 if (i < 0 || i > MAX_CODE_ALIGN)
1269 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1271 align_jumps = 1 << i;
1275 if (ix86_align_funcs_string)
1277 warning ("-malign-functions is obsolete, use -falign-functions");
1278 if (align_functions == 0)
1280 i = atoi (ix86_align_funcs_string);
1281 if (i < 0 || i > MAX_CODE_ALIGN)
1282 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1284 align_functions = 1 << i;
1288 /* Default align_* from the processor table. */
1289 if (align_loops == 0)
1291 align_loops = processor_target_table[ix86_cpu].align_loop;
1292 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1294 if (align_jumps == 0)
1296 align_jumps = processor_target_table[ix86_cpu].align_jump;
1297 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1299 if (align_functions == 0)
1301 align_functions = processor_target_table[ix86_cpu].align_func;
1304 /* Validate -mpreferred-stack-boundary= value, or provide default.
1305 The default of 128 bits is for Pentium III's SSE __m128, but we
1306 don't want additional code to keep the stack aligned when
1307 optimizing for code size. */
1308 ix86_preferred_stack_boundary = (optimize_size
1309 ? TARGET_64BIT ? 128 : 32
1311 if (ix86_preferred_stack_boundary_string)
1313 i = atoi (ix86_preferred_stack_boundary_string);
1314 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1315 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1316 TARGET_64BIT ? 4 : 2);
1318 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1321 /* Validate -mbranch-cost= value, or provide default. */
1322 ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
1323 if (ix86_branch_cost_string)
1325 i = atoi (ix86_branch_cost_string);
1327 error ("-mbranch-cost=%d is not between 0 and 5", i);
1329 ix86_branch_cost = i;
1332 if (ix86_tls_dialect_string)
1334 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1335 ix86_tls_dialect = TLS_DIALECT_GNU;
1336 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1337 ix86_tls_dialect = TLS_DIALECT_SUN;
1339 error ("bad value (%s) for -mtls-dialect= switch",
1340 ix86_tls_dialect_string);
1343 /* Keep nonleaf frame pointers. */
1344 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1345 flag_omit_frame_pointer = 1;
1347 /* If we're doing fast math, we don't care about comparison order
1348 wrt NaNs. This lets us use a shorter comparison sequence. */
1349 if (flag_unsafe_math_optimizations)
1350 target_flags &= ~MASK_IEEE_FP;
1352 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1353 since the insns won't need emulation. */
1354 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1355 target_flags &= ~MASK_NO_FANCY_MATH_387;
1359 if (TARGET_ALIGN_DOUBLE)
1360 error ("-malign-double makes no sense in the 64bit mode");
1362 error ("-mrtd calling convention not supported in the 64bit mode");
1363 /* Enable by default the SSE and MMX builtins. */
1364 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1365 ix86_fpmath = FPMATH_SSE;
1368 ix86_fpmath = FPMATH_387;
1370 if (ix86_fpmath_string != 0)
1372 if (! strcmp (ix86_fpmath_string, "387"))
1373 ix86_fpmath = FPMATH_387;
1374 else if (! strcmp (ix86_fpmath_string, "sse"))
1378 warning ("SSE instruction set disabled, using 387 arithmetics");
1379 ix86_fpmath = FPMATH_387;
1382 ix86_fpmath = FPMATH_SSE;
1384 else if (! strcmp (ix86_fpmath_string, "387,sse")
1385 || ! strcmp (ix86_fpmath_string, "sse,387"))
1389 warning ("SSE instruction set disabled, using 387 arithmetics");
1390 ix86_fpmath = FPMATH_387;
1392 else if (!TARGET_80387)
1394 warning ("387 instruction set disabled, using SSE arithmetics");
1395 ix86_fpmath = FPMATH_SSE;
1398 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1401 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1404 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1408 target_flags |= MASK_MMX;
1409 x86_prefetch_sse = true;
1412 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1415 target_flags |= MASK_MMX;
1416 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1417 extensions it adds. */
1418 if (x86_3dnow_a & (1 << ix86_arch))
1419 target_flags |= MASK_3DNOW_A;
1421 if ((x86_accumulate_outgoing_args & CPUMASK)
1422 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1424 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1426 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1429 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1430 p = strchr (internal_label_prefix, 'X');
1431 internal_label_prefix_len = p - internal_label_prefix;
1437 optimization_options (level, size)
1439 int size ATTRIBUTE_UNUSED;
1441 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1442 make the problem with not enough registers even worse. */
1443 #ifdef INSN_SCHEDULING
1445 flag_schedule_insns = 0;
1448 /* The default values of these switches depend on the TARGET_64BIT
1449 that is not known at this moment. Mark these values with 2 and
1450 let user the to override these. In case there is no command line option
1451 specifying them, we will set the defaults in override_options. */
1453 flag_omit_frame_pointer = 2;
1454 flag_pcc_struct_return = 2;
1455 flag_asynchronous_unwind_tables = 2;
1458 /* Table of valid machine attributes. */
1459 const struct attribute_spec ix86_attribute_table[] =
1461 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1462 /* Stdcall attribute says callee is responsible for popping arguments
1463 if they are not variable. */
1464 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1465 /* Fastcall attribute says callee is responsible for popping arguments
1466 if they are not variable. */
1467 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1468 /* Cdecl attribute says the callee is a normal C declaration */
1469 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1470 /* Regparm attribute specifies how many integer arguments are to be
1471 passed in registers. */
1472 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1473 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1474 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1475 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1476 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1478 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1479 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1480 { NULL, 0, 0, false, false, false, NULL }
1483 /* If PIC, we cannot make sibling calls to global functions
1484 because the PLT requires %ebx live.
1485 If we are returning floats on the register stack, we cannot make
1486 sibling calls to functions that return floats. (The stack adjust
1487 instruction will wind up after the sibcall jump, and not be executed.) */
1490 ix86_function_ok_for_sibcall (decl, exp)
1494 /* If we are generating position-independent code, we cannot sibcall
1495 optimize any indirect call, or a direct call to a global function,
1496 as the PLT requires %ebx be live. */
1497 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1500 /* If we are returning floats on the 80387 register stack, we cannot
1501 make a sibcall from a function that doesn't return a float to a
1502 function that does; the necessary stack adjustment will not be
1504 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1505 && ! STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1508 /* If this call is indirect, we'll need to be able to use a call-clobbered
1509 register for the address of the target function. Make sure that all
1510 such registers are not used for passing parameters. */
1511 if (!decl && !TARGET_64BIT)
1513 int regparm = ix86_regparm;
1516 /* We're looking at the CALL_EXPR, we need the type of the function. */
1517 type = TREE_OPERAND (exp, 0); /* pointer expression */
1518 type = TREE_TYPE (type); /* pointer type */
1519 type = TREE_TYPE (type); /* function type */
1521 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1523 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1527 /* ??? Need to count the actual number of registers to be used,
1528 not the possible number of registers. Fix later. */
1533 /* Otherwise okay. That also includes certain types of indirect calls. */
1537 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1538 arguments as in struct attribute_spec.handler. */
1540 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1543 tree args ATTRIBUTE_UNUSED;
1544 int flags ATTRIBUTE_UNUSED;
1547 if (TREE_CODE (*node) != FUNCTION_TYPE
1548 && TREE_CODE (*node) != METHOD_TYPE
1549 && TREE_CODE (*node) != FIELD_DECL
1550 && TREE_CODE (*node) != TYPE_DECL)
1552 warning ("`%s' attribute only applies to functions",
1553 IDENTIFIER_POINTER (name));
1554 *no_add_attrs = true;
1558 if (is_attribute_p ("fastcall", name))
1560 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1562 error ("fastcall and stdcall attributes are not compatible");
1564 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1566 error ("fastcall and regparm attributes are not compatible");
1569 else if (is_attribute_p ("stdcall", name))
1571 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1573 error ("fastcall and stdcall attributes are not compatible");
1580 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1581 *no_add_attrs = true;
1587 /* Handle a "regparm" attribute;
1588 arguments as in struct attribute_spec.handler. */
1590 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1594 int flags ATTRIBUTE_UNUSED;
1597 if (TREE_CODE (*node) != FUNCTION_TYPE
1598 && TREE_CODE (*node) != METHOD_TYPE
1599 && TREE_CODE (*node) != FIELD_DECL
1600 && TREE_CODE (*node) != TYPE_DECL)
1602 warning ("`%s' attribute only applies to functions",
1603 IDENTIFIER_POINTER (name));
1604 *no_add_attrs = true;
1610 cst = TREE_VALUE (args);
1611 if (TREE_CODE (cst) != INTEGER_CST)
1613 warning ("`%s' attribute requires an integer constant argument",
1614 IDENTIFIER_POINTER (name));
1615 *no_add_attrs = true;
1617 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1619 warning ("argument to `%s' attribute larger than %d",
1620 IDENTIFIER_POINTER (name), REGPARM_MAX);
1621 *no_add_attrs = true;
1624 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1626 error ("fastcall and regparm attributes are not compatible");
1633 /* Return 0 if the attributes for two types are incompatible, 1 if they
1634 are compatible, and 2 if they are nearly compatible (which causes a
1635 warning to be generated). */
1638 ix86_comp_type_attributes (type1, type2)
1642 /* Check for mismatch of non-default calling convention. */
1643 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1645 if (TREE_CODE (type1) != FUNCTION_TYPE)
1648 /* Check for mismatched fastcall types */
1649 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1650 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1653 /* Check for mismatched return types (cdecl vs stdcall). */
1654 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1655 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1660 /* Return the regparm value for a fuctio with the indicated TYPE. */
1663 ix86_fntype_regparm (type)
1668 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1670 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1672 return ix86_regparm;
1675 /* Value is the number of bytes of arguments automatically
1676 popped when returning from a subroutine call.
1677 FUNDECL is the declaration node of the function (as a tree),
1678 FUNTYPE is the data type of the function (as a tree),
1679 or for a library call it is an identifier node for the subroutine name.
1680 SIZE is the number of bytes of arguments passed on the stack.
1682 On the 80386, the RTD insn may be used to pop them if the number
1683 of args is fixed, but if the number is variable then the caller
1684 must pop them all. RTD can't be used for library calls now
1685 because the library is compiled with the Unix compiler.
1686 Use of RTD is a selectable option, since it is incompatible with
1687 standard Unix calling sequences. If the option is not selected,
1688 the caller must always pop the args.
1690 The attribute stdcall is equivalent to RTD on a per module basis. */
1693 ix86_return_pops_args (fundecl, funtype, size)
1698 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1700 /* Cdecl functions override -mrtd, and never pop the stack. */
1701 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1703 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1704 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1705 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1709 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1710 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1711 == void_type_node)))
1715 /* Lose any fake structure return argument if it is passed on the stack. */
1716 if (aggregate_value_p (TREE_TYPE (funtype))
1719 int nregs = ix86_fntype_regparm (funtype);
1722 return GET_MODE_SIZE (Pmode);
1728 /* Argument support functions. */
1730 /* Return true when register may be used to pass function parameters. */
1732 ix86_function_arg_regno_p (regno)
1737 return (regno < REGPARM_MAX
1738 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1739 if (SSE_REGNO_P (regno) && TARGET_SSE)
1741 /* RAX is used as hidden argument to va_arg functions. */
1744 for (i = 0; i < REGPARM_MAX; i++)
1745 if (regno == x86_64_int_parameter_registers[i])
1750 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1751 for a call to a function whose data type is FNTYPE.
1752 For a library call, FNTYPE is 0. */
1755 init_cumulative_args (cum, fntype, libname)
1756 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1757 tree fntype; /* tree ptr for function decl */
1758 rtx libname; /* SYMBOL_REF of library name or 0 */
1760 static CUMULATIVE_ARGS zero_cum;
1761 tree param, next_param;
1763 if (TARGET_DEBUG_ARG)
1765 fprintf (stderr, "\ninit_cumulative_args (");
1767 fprintf (stderr, "fntype code = %s, ret code = %s",
1768 tree_code_name[(int) TREE_CODE (fntype)],
1769 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1771 fprintf (stderr, "no fntype");
1774 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1779 /* Set up the number of registers to use for passing arguments. */
1780 cum->nregs = ix86_regparm;
1781 cum->sse_nregs = SSE_REGPARM_MAX;
1782 if (fntype && !TARGET_64BIT)
1784 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1787 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1789 cum->maybe_vaarg = false;
1791 /* Use ecx and edx registers if function has fastcall attribute */
1792 if (fntype && !TARGET_64BIT)
1794 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1802 /* Determine if this function has variable arguments. This is
1803 indicated by the last argument being 'void_type_mode' if there
1804 are no variable arguments. If there are variable arguments, then
1805 we won't pass anything in registers */
1809 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1810 param != 0; param = next_param)
1812 next_param = TREE_CHAIN (param);
1813 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1820 cum->maybe_vaarg = true;
1824 if ((!fntype && !libname)
1825 || (fntype && !TYPE_ARG_TYPES (fntype)))
1826 cum->maybe_vaarg = 1;
1828 if (TARGET_DEBUG_ARG)
1829 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1834 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1835 of this code is to classify each 8bytes of incoming argument by the register
1836 class and assign registers accordingly. */
1838 /* Return the union class of CLASS1 and CLASS2.
1839 See the x86-64 PS ABI for details. */
1841 static enum x86_64_reg_class
1842 merge_classes (class1, class2)
1843 enum x86_64_reg_class class1, class2;
1845 /* Rule #1: If both classes are equal, this is the resulting class. */
1846 if (class1 == class2)
1849 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1851 if (class1 == X86_64_NO_CLASS)
1853 if (class2 == X86_64_NO_CLASS)
1856 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1857 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1858 return X86_64_MEMORY_CLASS;
1860 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1861 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1862 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1863 return X86_64_INTEGERSI_CLASS;
1864 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1865 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1866 return X86_64_INTEGER_CLASS;
1868 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1869 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1870 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1871 return X86_64_MEMORY_CLASS;
1873 /* Rule #6: Otherwise class SSE is used. */
1874 return X86_64_SSE_CLASS;
1877 /* Classify the argument of type TYPE and mode MODE.
1878 CLASSES will be filled by the register class used to pass each word
1879 of the operand. The number of words is returned. In case the parameter
1880 should be passed in memory, 0 is returned. As a special case for zero
1881 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1883 BIT_OFFSET is used internally for handling records and specifies offset
1884 of the offset in bits modulo 256 to avoid overflow cases.
1886 See the x86-64 PS ABI for details.
1890 classify_argument (mode, type, classes, bit_offset)
1891 enum machine_mode mode;
1893 enum x86_64_reg_class classes[MAX_CLASSES];
1897 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1898 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1900 /* Variable sized entities are always passed/returned in memory. */
1904 if (type && AGGREGATE_TYPE_P (type))
1908 enum x86_64_reg_class subclasses[MAX_CLASSES];
1910 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1914 for (i = 0; i < words; i++)
1915 classes[i] = X86_64_NO_CLASS;
1917 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1918 signalize memory class, so handle it as special case. */
1921 classes[0] = X86_64_NO_CLASS;
1925 /* Classify each field of record and merge classes. */
1926 if (TREE_CODE (type) == RECORD_TYPE)
1928 /* For classes first merge in the field of the subclasses. */
1929 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1931 tree bases = TYPE_BINFO_BASETYPES (type);
1932 int n_bases = TREE_VEC_LENGTH (bases);
1935 for (i = 0; i < n_bases; ++i)
1937 tree binfo = TREE_VEC_ELT (bases, i);
1939 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1940 tree type = BINFO_TYPE (binfo);
1942 num = classify_argument (TYPE_MODE (type),
1944 (offset + bit_offset) % 256);
1947 for (i = 0; i < num; i++)
1949 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1951 merge_classes (subclasses[i], classes[i + pos]);
1955 /* And now merge the fields of structure. */
1956 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1958 if (TREE_CODE (field) == FIELD_DECL)
1962 /* Bitfields are always classified as integer. Handle them
1963 early, since later code would consider them to be
1964 misaligned integers. */
1965 if (DECL_BIT_FIELD (field))
1967 for (i = int_bit_position (field) / 8 / 8;
1968 i < (int_bit_position (field)
1969 + tree_low_cst (DECL_SIZE (field), 0)
1972 merge_classes (X86_64_INTEGER_CLASS,
1977 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1978 TREE_TYPE (field), subclasses,
1979 (int_bit_position (field)
1980 + bit_offset) % 256);
1983 for (i = 0; i < num; i++)
1986 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1988 merge_classes (subclasses[i], classes[i + pos]);
1994 /* Arrays are handled as small records. */
1995 else if (TREE_CODE (type) == ARRAY_TYPE)
1998 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1999 TREE_TYPE (type), subclasses, bit_offset);
2003 /* The partial classes are now full classes. */
2004 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2005 subclasses[0] = X86_64_SSE_CLASS;
2006 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2007 subclasses[0] = X86_64_INTEGER_CLASS;
2009 for (i = 0; i < words; i++)
2010 classes[i] = subclasses[i % num];
2012 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2013 else if (TREE_CODE (type) == UNION_TYPE
2014 || TREE_CODE (type) == QUAL_UNION_TYPE)
2016 /* For classes first merge in the field of the subclasses. */
2017 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2019 tree bases = TYPE_BINFO_BASETYPES (type);
2020 int n_bases = TREE_VEC_LENGTH (bases);
2023 for (i = 0; i < n_bases; ++i)
2025 tree binfo = TREE_VEC_ELT (bases, i);
2027 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2028 tree type = BINFO_TYPE (binfo);
2030 num = classify_argument (TYPE_MODE (type),
2032 (offset + (bit_offset % 64)) % 256);
2035 for (i = 0; i < num; i++)
2037 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2039 merge_classes (subclasses[i], classes[i + pos]);
2043 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2045 if (TREE_CODE (field) == FIELD_DECL)
2048 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2049 TREE_TYPE (field), subclasses,
2053 for (i = 0; i < num; i++)
2054 classes[i] = merge_classes (subclasses[i], classes[i]);
2061 /* Final merger cleanup. */
2062 for (i = 0; i < words; i++)
2064 /* If one class is MEMORY, everything should be passed in
2066 if (classes[i] == X86_64_MEMORY_CLASS)
2069 /* The X86_64_SSEUP_CLASS should be always preceded by
2070 X86_64_SSE_CLASS. */
2071 if (classes[i] == X86_64_SSEUP_CLASS
2072 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2073 classes[i] = X86_64_SSE_CLASS;
2075 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2076 if (classes[i] == X86_64_X87UP_CLASS
2077 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2078 classes[i] = X86_64_SSE_CLASS;
2083 /* Compute alignment needed. We align all types to natural boundaries with
2084 exception of XFmode that is aligned to 64bits. */
2085 if (mode != VOIDmode && mode != BLKmode)
2087 int mode_alignment = GET_MODE_BITSIZE (mode);
2090 mode_alignment = 128;
2091 else if (mode == XCmode)
2092 mode_alignment = 256;
2093 /* Misaligned fields are always returned in memory. */
2094 if (bit_offset % mode_alignment)
2098 /* Classification of atomic types. */
2108 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2109 classes[0] = X86_64_INTEGERSI_CLASS;
2111 classes[0] = X86_64_INTEGER_CLASS;
2115 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2118 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2119 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2122 if (!(bit_offset % 64))
2123 classes[0] = X86_64_SSESF_CLASS;
2125 classes[0] = X86_64_SSE_CLASS;
2128 classes[0] = X86_64_SSEDF_CLASS;
2131 classes[0] = X86_64_X87_CLASS;
2132 classes[1] = X86_64_X87UP_CLASS;
2135 classes[0] = X86_64_X87_CLASS;
2136 classes[1] = X86_64_X87UP_CLASS;
2137 classes[2] = X86_64_X87_CLASS;
2138 classes[3] = X86_64_X87UP_CLASS;
2141 classes[0] = X86_64_SSEDF_CLASS;
2142 classes[1] = X86_64_SSEDF_CLASS;
2145 classes[0] = X86_64_SSE_CLASS;
2153 classes[0] = X86_64_SSE_CLASS;
2154 classes[1] = X86_64_SSEUP_CLASS;
2169 /* Examine the argument and return set number of register required in each
2170 class. Return 0 iff parameter should be passed in memory. */
2172 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2173 enum machine_mode mode;
2175 int *int_nregs, *sse_nregs;
2178 enum x86_64_reg_class class[MAX_CLASSES];
2179 int n = classify_argument (mode, type, class, 0);
2185 for (n--; n >= 0; n--)
2188 case X86_64_INTEGER_CLASS:
2189 case X86_64_INTEGERSI_CLASS:
2192 case X86_64_SSE_CLASS:
2193 case X86_64_SSESF_CLASS:
2194 case X86_64_SSEDF_CLASS:
2197 case X86_64_NO_CLASS:
2198 case X86_64_SSEUP_CLASS:
2200 case X86_64_X87_CLASS:
2201 case X86_64_X87UP_CLASS:
2205 case X86_64_MEMORY_CLASS:
2210 /* Construct container for the argument used by GCC interface. See
2211 FUNCTION_ARG for the detailed description. */
2213 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2214 enum machine_mode mode;
2217 int nintregs, nsseregs;
2221 enum machine_mode tmpmode;
2223 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2224 enum x86_64_reg_class class[MAX_CLASSES];
2228 int needed_sseregs, needed_intregs;
2229 rtx exp[MAX_CLASSES];
2232 n = classify_argument (mode, type, class, 0);
2233 if (TARGET_DEBUG_ARG)
2236 fprintf (stderr, "Memory class\n");
2239 fprintf (stderr, "Classes:");
2240 for (i = 0; i < n; i++)
2242 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2244 fprintf (stderr, "\n");
2249 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2251 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2254 /* First construct simple cases. Avoid SCmode, since we want to use
2255 single register to pass this type. */
2256 if (n == 1 && mode != SCmode)
2259 case X86_64_INTEGER_CLASS:
2260 case X86_64_INTEGERSI_CLASS:
2261 return gen_rtx_REG (mode, intreg[0]);
2262 case X86_64_SSE_CLASS:
2263 case X86_64_SSESF_CLASS:
2264 case X86_64_SSEDF_CLASS:
2265 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2266 case X86_64_X87_CLASS:
2267 return gen_rtx_REG (mode, FIRST_STACK_REG);
2268 case X86_64_NO_CLASS:
2269 /* Zero sized array, struct or class. */
2274 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2275 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2277 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2278 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2279 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2280 && class[1] == X86_64_INTEGER_CLASS
2281 && (mode == CDImode || mode == TImode)
2282 && intreg[0] + 1 == intreg[1])
2283 return gen_rtx_REG (mode, intreg[0]);
2285 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2286 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2287 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2289 /* Otherwise figure out the entries of the PARALLEL. */
2290 for (i = 0; i < n; i++)
2294 case X86_64_NO_CLASS:
2296 case X86_64_INTEGER_CLASS:
2297 case X86_64_INTEGERSI_CLASS:
2298 /* Merge TImodes on aligned occasions here too. */
2299 if (i * 8 + 8 > bytes)
2300 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2301 else if (class[i] == X86_64_INTEGERSI_CLASS)
2305 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2306 if (tmpmode == BLKmode)
2308 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2309 gen_rtx_REG (tmpmode, *intreg),
2313 case X86_64_SSESF_CLASS:
2314 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2315 gen_rtx_REG (SFmode,
2316 SSE_REGNO (sse_regno)),
2320 case X86_64_SSEDF_CLASS:
2321 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2322 gen_rtx_REG (DFmode,
2323 SSE_REGNO (sse_regno)),
2327 case X86_64_SSE_CLASS:
2328 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2332 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2333 gen_rtx_REG (tmpmode,
2334 SSE_REGNO (sse_regno)),
2336 if (tmpmode == TImode)
2344 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2345 for (i = 0; i < nexps; i++)
2346 XVECEXP (ret, 0, i) = exp [i];
2350 /* Update the data in CUM to advance over an argument
2351 of mode MODE and data type TYPE.
2352 (TYPE is null for libcalls where that information may not be available.) */
2355 function_arg_advance (cum, mode, type, named)
2356 CUMULATIVE_ARGS *cum; /* current arg information */
2357 enum machine_mode mode; /* current arg mode */
2358 tree type; /* type of the argument or 0 if lib support */
2359 int named; /* whether or not the argument was named */
2362 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2363 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2365 if (TARGET_DEBUG_ARG)
2367 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2368 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2371 int int_nregs, sse_nregs;
2372 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2373 cum->words += words;
2374 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2376 cum->nregs -= int_nregs;
2377 cum->sse_nregs -= sse_nregs;
2378 cum->regno += int_nregs;
2379 cum->sse_regno += sse_nregs;
2382 cum->words += words;
2386 if (TARGET_SSE && mode == TImode)
2388 cum->sse_words += words;
2389 cum->sse_nregs -= 1;
2390 cum->sse_regno += 1;
2391 if (cum->sse_nregs <= 0)
2399 cum->words += words;
2400 cum->nregs -= words;
2401 cum->regno += words;
2403 if (cum->nregs <= 0)
2413 /* Define where to put the arguments to a function.
2414 Value is zero to push the argument on the stack,
2415 or a hard register in which to store the argument.
2417 MODE is the argument's machine mode.
2418 TYPE is the data type of the argument (as a tree).
2419 This is null for libcalls where that information may
2421 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2422 the preceding args and about the function being called.
2423 NAMED is nonzero if this argument is a named parameter
2424 (otherwise it is an extra parameter matching an ellipsis). */
2427 function_arg (cum, mode, type, named)
2428 CUMULATIVE_ARGS *cum; /* current arg information */
2429 enum machine_mode mode; /* current arg mode */
2430 tree type; /* type of the argument or 0 if lib support */
2431 int named; /* != 0 for normal args, == 0 for ... args */
2435 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2436 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2438 /* Handle a hidden AL argument containing number of registers for varargs
2439 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2441 if (mode == VOIDmode)
2444 return GEN_INT (cum->maybe_vaarg
2445 ? (cum->sse_nregs < 0
2453 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2454 &x86_64_int_parameter_registers [cum->regno],
2459 /* For now, pass fp/complex values on the stack. */
2468 if (words <= cum->nregs)
2470 int regno = cum->regno;
2472 /* Fastcall allocates the first two DWORD (SImode) or
2473 smaller arguments to ECX and EDX. */
2476 if (mode == BLKmode || mode == DImode)
2479 /* ECX not EAX is the first allocated register. */
2483 ret = gen_rtx_REG (mode, regno);
2488 ret = gen_rtx_REG (mode, cum->sse_regno);
2492 if (TARGET_DEBUG_ARG)
2495 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2496 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2499 print_simple_rtl (stderr, ret);
2501 fprintf (stderr, ", stack");
2503 fprintf (stderr, " )\n");
2509 /* A C expression that indicates when an argument must be passed by
2510 reference. If nonzero for an argument, a copy of that argument is
2511 made in memory and a pointer to the argument is passed instead of
2512 the argument itself. The pointer is passed in whatever way is
2513 appropriate for passing a pointer to that type. */
2516 function_arg_pass_by_reference (cum, mode, type, named)
2517 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2518 enum machine_mode mode ATTRIBUTE_UNUSED;
2520 int named ATTRIBUTE_UNUSED;
2525 if (type && int_size_in_bytes (type) == -1)
2527 if (TARGET_DEBUG_ARG)
2528 fprintf (stderr, "function_arg_pass_by_reference\n");
2535 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2539 ix86_function_arg_boundary (mode, type)
2540 enum machine_mode mode;
2545 return PARM_BOUNDARY;
2547 align = TYPE_ALIGN (type);
2549 align = GET_MODE_ALIGNMENT (mode);
2550 if (align < PARM_BOUNDARY)
2551 align = PARM_BOUNDARY;
2557 /* Return true if N is a possible register number of function value. */
2559 ix86_function_value_regno_p (regno)
2564 return ((regno) == 0
2565 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2566 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2568 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2569 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2570 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2573 /* Define how to find the value returned by a function.
2574 VALTYPE is the data type of the value (as a tree).
2575 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2576 otherwise, FUNC is 0. */
2578 ix86_function_value (valtype)
2583 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2584 REGPARM_MAX, SSE_REGPARM_MAX,
2585 x86_64_int_return_registers, 0);
2586 /* For zero sized structures, construct_container return NULL, but we need
2587 to keep rest of compiler happy by returning meaningful value. */
2589 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2593 return gen_rtx_REG (TYPE_MODE (valtype),
2594 ix86_value_regno (TYPE_MODE (valtype)));
2597 /* Return false iff type is returned in memory. */
2599 ix86_return_in_memory (type)
2602 int needed_intregs, needed_sseregs;
2605 return !examine_argument (TYPE_MODE (type), type, 1,
2606 &needed_intregs, &needed_sseregs);
2610 if (TYPE_MODE (type) == BLKmode
2611 || (VECTOR_MODE_P (TYPE_MODE (type))
2612 && int_size_in_bytes (type) == 8)
2613 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2614 && TYPE_MODE (type) != TFmode
2615 && !VECTOR_MODE_P (TYPE_MODE (type))))
2621 /* Define how to find the value returned by a library function
2622 assuming the value has mode MODE. */
2624 ix86_libcall_value (mode)
2625 enum machine_mode mode;
2635 return gen_rtx_REG (mode, FIRST_SSE_REG);
2638 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2640 return gen_rtx_REG (mode, 0);
2644 return gen_rtx_REG (mode, ix86_value_regno (mode));
2647 /* Given a mode, return the register to use for a return value. */
2650 ix86_value_regno (mode)
2651 enum machine_mode mode;
2653 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2654 return FIRST_FLOAT_REG;
2655 if (mode == TImode || VECTOR_MODE_P (mode))
2656 return FIRST_SSE_REG;
2660 /* Create the va_list data type. */
2663 ix86_build_va_list ()
2665 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2667 /* For i386 we use plain pointer to argument area. */
2669 return build_pointer_type (char_type_node);
2671 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2672 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2674 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2675 unsigned_type_node);
2676 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2677 unsigned_type_node);
2678 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2680 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2683 DECL_FIELD_CONTEXT (f_gpr) = record;
2684 DECL_FIELD_CONTEXT (f_fpr) = record;
2685 DECL_FIELD_CONTEXT (f_ovf) = record;
2686 DECL_FIELD_CONTEXT (f_sav) = record;
2688 TREE_CHAIN (record) = type_decl;
2689 TYPE_NAME (record) = type_decl;
2690 TYPE_FIELDS (record) = f_gpr;
2691 TREE_CHAIN (f_gpr) = f_fpr;
2692 TREE_CHAIN (f_fpr) = f_ovf;
2693 TREE_CHAIN (f_ovf) = f_sav;
2695 layout_type (record);
2697 /* The correct type is an array type of one element. */
2698 return build_array_type (record, build_index_type (size_zero_node));
2701 /* Perform any needed actions needed for a function that is receiving a
2702 variable number of arguments.
2706 MODE and TYPE are the mode and type of the current parameter.
2708 PRETEND_SIZE is a variable that should be set to the amount of stack
2709 that must be pushed by the prolog to pretend that our caller pushed
2712 Normally, this macro will push all remaining incoming registers on the
2713 stack and set PRETEND_SIZE to the length of the registers pushed. */
2716 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2717 CUMULATIVE_ARGS *cum;
2718 enum machine_mode mode;
2720 int *pretend_size ATTRIBUTE_UNUSED;
2724 CUMULATIVE_ARGS next_cum;
2725 rtx save_area = NULL_RTX, mem;
2738 /* Indicate to allocate space on the stack for varargs save area. */
2739 ix86_save_varrargs_registers = 1;
2741 fntype = TREE_TYPE (current_function_decl);
2742 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2743 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2744 != void_type_node));
2746 /* For varargs, we do not want to skip the dummy va_dcl argument.
2747 For stdargs, we do want to skip the last named argument. */
2750 function_arg_advance (&next_cum, mode, type, 1);
2753 save_area = frame_pointer_rtx;
2755 set = get_varargs_alias_set ();
2757 for (i = next_cum.regno; i < ix86_regparm; i++)
2759 mem = gen_rtx_MEM (Pmode,
2760 plus_constant (save_area, i * UNITS_PER_WORD));
2761 set_mem_alias_set (mem, set);
2762 emit_move_insn (mem, gen_rtx_REG (Pmode,
2763 x86_64_int_parameter_registers[i]));
2766 if (next_cum.sse_nregs)
2768 /* Now emit code to save SSE registers. The AX parameter contains number
2769 of SSE parameter registers used to call this function. We use
2770 sse_prologue_save insn template that produces computed jump across
2771 SSE saves. We need some preparation work to get this working. */
2773 label = gen_label_rtx ();
2774 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2776 /* Compute address to jump to :
2777 label - 5*eax + nnamed_sse_arguments*5 */
2778 tmp_reg = gen_reg_rtx (Pmode);
2779 nsse_reg = gen_reg_rtx (Pmode);
2780 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2781 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2782 gen_rtx_MULT (Pmode, nsse_reg,
2784 if (next_cum.sse_regno)
2787 gen_rtx_CONST (DImode,
2788 gen_rtx_PLUS (DImode,
2790 GEN_INT (next_cum.sse_regno * 4))));
2792 emit_move_insn (nsse_reg, label_ref);
2793 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2795 /* Compute address of memory block we save into. We always use pointer
2796 pointing 127 bytes after first byte to store - this is needed to keep
2797 instruction size limited by 4 bytes. */
2798 tmp_reg = gen_reg_rtx (Pmode);
2799 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2800 plus_constant (save_area,
2801 8 * REGPARM_MAX + 127)));
2802 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2803 set_mem_alias_set (mem, set);
2804 set_mem_align (mem, BITS_PER_WORD);
2806 /* And finally do the dirty job! */
2807 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2808 GEN_INT (next_cum.sse_regno), label));
2813 /* Implement va_start. */
2816 ix86_va_start (valist, nextarg)
2820 HOST_WIDE_INT words, n_gpr, n_fpr;
2821 tree f_gpr, f_fpr, f_ovf, f_sav;
2822 tree gpr, fpr, ovf, sav, t;
2824 /* Only 64bit target needs something special. */
2827 std_expand_builtin_va_start (valist, nextarg);
2831 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2832 f_fpr = TREE_CHAIN (f_gpr);
2833 f_ovf = TREE_CHAIN (f_fpr);
2834 f_sav = TREE_CHAIN (f_ovf);
2836 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2837 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2838 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2839 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2840 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2842 /* Count number of gp and fp argument registers used. */
2843 words = current_function_args_info.words;
2844 n_gpr = current_function_args_info.regno;
2845 n_fpr = current_function_args_info.sse_regno;
2847 if (TARGET_DEBUG_ARG)
2848 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2849 (int) words, (int) n_gpr, (int) n_fpr);
2851 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2852 build_int_2 (n_gpr * 8, 0));
2853 TREE_SIDE_EFFECTS (t) = 1;
2854 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2856 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2857 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2858 TREE_SIDE_EFFECTS (t) = 1;
2859 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2861 /* Find the overflow area. */
2862 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2864 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2865 build_int_2 (words * UNITS_PER_WORD, 0));
2866 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2867 TREE_SIDE_EFFECTS (t) = 1;
2868 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2870 /* Find the register save area.
2871 Prologue of the function save it right above stack frame. */
2872 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2873 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2874 TREE_SIDE_EFFECTS (t) = 1;
2875 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2878 /* Implement va_arg. */
2880 ix86_va_arg (valist, type)
2883 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2884 tree f_gpr, f_fpr, f_ovf, f_sav;
2885 tree gpr, fpr, ovf, sav, t;
2887 rtx lab_false, lab_over = NULL_RTX;
2892 /* Only 64bit target needs something special. */
2895 return std_expand_builtin_va_arg (valist, type);
2898 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2899 f_fpr = TREE_CHAIN (f_gpr);
2900 f_ovf = TREE_CHAIN (f_fpr);
2901 f_sav = TREE_CHAIN (f_ovf);
2903 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2904 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2905 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2906 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2907 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2909 size = int_size_in_bytes (type);
2912 /* Passed by reference. */
2914 type = build_pointer_type (type);
2915 size = int_size_in_bytes (type);
2917 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2919 container = construct_container (TYPE_MODE (type), type, 0,
2920 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2922 * Pull the value out of the saved registers ...
2925 addr_rtx = gen_reg_rtx (Pmode);
2929 rtx int_addr_rtx, sse_addr_rtx;
2930 int needed_intregs, needed_sseregs;
2933 lab_over = gen_label_rtx ();
2934 lab_false = gen_label_rtx ();
2936 examine_argument (TYPE_MODE (type), type, 0,
2937 &needed_intregs, &needed_sseregs);
2940 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2941 || TYPE_ALIGN (type) > 128);
2943 /* In case we are passing structure, verify that it is consecutive block
2944 on the register save area. If not we need to do moves. */
2945 if (!need_temp && !REG_P (container))
2947 /* Verify that all registers are strictly consecutive */
2948 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2952 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2954 rtx slot = XVECEXP (container, 0, i);
2955 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2956 || INTVAL (XEXP (slot, 1)) != i * 16)
2964 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2966 rtx slot = XVECEXP (container, 0, i);
2967 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2968 || INTVAL (XEXP (slot, 1)) != i * 8)
2975 int_addr_rtx = addr_rtx;
2976 sse_addr_rtx = addr_rtx;
2980 int_addr_rtx = gen_reg_rtx (Pmode);
2981 sse_addr_rtx = gen_reg_rtx (Pmode);
2983 /* First ensure that we fit completely in registers. */
2986 emit_cmp_and_jump_insns (expand_expr
2987 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2988 GEN_INT ((REGPARM_MAX - needed_intregs +
2989 1) * 8), GE, const1_rtx, SImode,
2994 emit_cmp_and_jump_insns (expand_expr
2995 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2996 GEN_INT ((SSE_REGPARM_MAX -
2997 needed_sseregs + 1) * 16 +
2998 REGPARM_MAX * 8), GE, const1_rtx,
2999 SImode, 1, lab_false);
3002 /* Compute index to start of area used for integer regs. */
3005 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3006 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3007 if (r != int_addr_rtx)
3008 emit_move_insn (int_addr_rtx, r);
3012 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3013 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3014 if (r != sse_addr_rtx)
3015 emit_move_insn (sse_addr_rtx, r);
3022 /* Never use the memory itself, as it has the alias set. */
3023 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3024 mem = gen_rtx_MEM (BLKmode, addr_rtx);
3025 set_mem_alias_set (mem, get_varargs_alias_set ());
3026 set_mem_align (mem, BITS_PER_UNIT);
3028 for (i = 0; i < XVECLEN (container, 0); i++)
3030 rtx slot = XVECEXP (container, 0, i);
3031 rtx reg = XEXP (slot, 0);
3032 enum machine_mode mode = GET_MODE (reg);
3038 if (SSE_REGNO_P (REGNO (reg)))
3040 src_addr = sse_addr_rtx;
3041 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3045 src_addr = int_addr_rtx;
3046 src_offset = REGNO (reg) * 8;
3048 src_mem = gen_rtx_MEM (mode, src_addr);
3049 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3050 src_mem = adjust_address (src_mem, mode, src_offset);
3051 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3052 emit_move_insn (dest_mem, src_mem);
3059 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3060 build_int_2 (needed_intregs * 8, 0));
3061 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3062 TREE_SIDE_EFFECTS (t) = 1;
3063 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3068 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3069 build_int_2 (needed_sseregs * 16, 0));
3070 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3071 TREE_SIDE_EFFECTS (t) = 1;
3072 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3075 emit_jump_insn (gen_jump (lab_over));
3077 emit_label (lab_false);
3080 /* ... otherwise out of the overflow area. */
3082 /* Care for on-stack alignment if needed. */
3083 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3087 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3088 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3089 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3093 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3095 emit_move_insn (addr_rtx, r);
3098 build (PLUS_EXPR, TREE_TYPE (t), t,
3099 build_int_2 (rsize * UNITS_PER_WORD, 0));
3100 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3101 TREE_SIDE_EFFECTS (t) = 1;
3102 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3105 emit_label (lab_over);
3109 r = gen_rtx_MEM (Pmode, addr_rtx);
3110 set_mem_alias_set (r, get_varargs_alias_set ());
3111 emit_move_insn (addr_rtx, r);
3117 /* Return nonzero if OP is either a i387 or SSE fp register. */
3119 any_fp_register_operand (op, mode)
3121 enum machine_mode mode ATTRIBUTE_UNUSED;
3123 return ANY_FP_REG_P (op);
3126 /* Return nonzero if OP is an i387 fp register. */
3128 fp_register_operand (op, mode)
3130 enum machine_mode mode ATTRIBUTE_UNUSED;
3132 return FP_REG_P (op);
3135 /* Return nonzero if OP is a non-fp register_operand. */
3137 register_and_not_any_fp_reg_operand (op, mode)
3139 enum machine_mode mode;
3141 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3144 /* Return nonzero if OP is a register operand other than an
3145 i387 fp register. */
3147 register_and_not_fp_reg_operand (op, mode)
3149 enum machine_mode mode;
3151 return register_operand (op, mode) && !FP_REG_P (op);
3154 /* Return nonzero if OP is general operand representable on x86_64. */
3157 x86_64_general_operand (op, mode)
3159 enum machine_mode mode;
3162 return general_operand (op, mode);
3163 if (nonimmediate_operand (op, mode))
3165 return x86_64_sign_extended_value (op);
3168 /* Return nonzero if OP is general operand representable on x86_64
3169 as either sign extended or zero extended constant. */
3172 x86_64_szext_general_operand (op, mode)
3174 enum machine_mode mode;
3177 return general_operand (op, mode);
3178 if (nonimmediate_operand (op, mode))
3180 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3183 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3186 x86_64_nonmemory_operand (op, mode)
3188 enum machine_mode mode;
3191 return nonmemory_operand (op, mode);
3192 if (register_operand (op, mode))
3194 return x86_64_sign_extended_value (op);
3197 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3200 x86_64_movabs_operand (op, mode)
3202 enum machine_mode mode;
3204 if (!TARGET_64BIT || !flag_pic)
3205 return nonmemory_operand (op, mode);
3206 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3208 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3213 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3216 x86_64_szext_nonmemory_operand (op, mode)
3218 enum machine_mode mode;
3221 return nonmemory_operand (op, mode);
3222 if (register_operand (op, mode))
3224 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3227 /* Return nonzero if OP is immediate operand representable on x86_64. */
3230 x86_64_immediate_operand (op, mode)
3232 enum machine_mode mode;
3235 return immediate_operand (op, mode);
3236 return x86_64_sign_extended_value (op);
3239 /* Return nonzero if OP is immediate operand representable on x86_64. */
3242 x86_64_zext_immediate_operand (op, mode)
3244 enum machine_mode mode ATTRIBUTE_UNUSED;
3246 return x86_64_zero_extended_value (op);
3249 /* Return nonzero if OP is (const_int 1), else return zero. */
3252 const_int_1_operand (op, mode)
3254 enum machine_mode mode ATTRIBUTE_UNUSED;
3256 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3259 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3260 for shift & compare patterns, as shifting by 0 does not change flags),
3261 else return zero. */
3264 const_int_1_31_operand (op, mode)
3266 enum machine_mode mode ATTRIBUTE_UNUSED;
3268 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3271 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3272 reference and a constant. */
3275 symbolic_operand (op, mode)
3277 enum machine_mode mode ATTRIBUTE_UNUSED;
3279 switch (GET_CODE (op))
3287 if (GET_CODE (op) == SYMBOL_REF
3288 || GET_CODE (op) == LABEL_REF
3289 || (GET_CODE (op) == UNSPEC
3290 && (XINT (op, 1) == UNSPEC_GOT
3291 || XINT (op, 1) == UNSPEC_GOTOFF
3292 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3294 if (GET_CODE (op) != PLUS
3295 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3299 if (GET_CODE (op) == SYMBOL_REF
3300 || GET_CODE (op) == LABEL_REF)
3302 /* Only @GOTOFF gets offsets. */
3303 if (GET_CODE (op) != UNSPEC
3304 || XINT (op, 1) != UNSPEC_GOTOFF)
3307 op = XVECEXP (op, 0, 0);
3308 if (GET_CODE (op) == SYMBOL_REF
3309 || GET_CODE (op) == LABEL_REF)
3318 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3321 pic_symbolic_operand (op, mode)
3323 enum machine_mode mode ATTRIBUTE_UNUSED;
3325 if (GET_CODE (op) != CONST)
3330 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3335 if (GET_CODE (op) == UNSPEC)
3337 if (GET_CODE (op) != PLUS
3338 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3341 if (GET_CODE (op) == UNSPEC)
3347 /* Return true if OP is a symbolic operand that resolves locally. */
3350 local_symbolic_operand (op, mode)
3352 enum machine_mode mode ATTRIBUTE_UNUSED;
3354 if (GET_CODE (op) == CONST
3355 && GET_CODE (XEXP (op, 0)) == PLUS
3356 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3357 op = XEXP (XEXP (op, 0), 0);
3359 if (GET_CODE (op) == LABEL_REF)
3362 if (GET_CODE (op) != SYMBOL_REF)
3365 /* These we've been told are local by varasm and encode_section_info
3367 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3370 /* There is, however, a not insubstantial body of code in the rest of
3371 the compiler that assumes it can just stick the results of
3372 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3373 /* ??? This is a hack. Should update the body of the compiler to
3374 always create a DECL an invoke targetm.encode_section_info. */
3375 if (strncmp (XSTR (op, 0), internal_label_prefix,
3376 internal_label_prefix_len) == 0)
3382 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3385 tls_symbolic_operand (op, mode)
3387 enum machine_mode mode ATTRIBUTE_UNUSED;
3389 const char *symbol_str;
3391 if (GET_CODE (op) != SYMBOL_REF)
3393 symbol_str = XSTR (op, 0);
3395 if (symbol_str[0] != '%')
3397 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3401 tls_symbolic_operand_1 (op, kind)
3403 enum tls_model kind;
3405 const char *symbol_str;
3407 if (GET_CODE (op) != SYMBOL_REF)
3409 symbol_str = XSTR (op, 0);
3411 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3415 global_dynamic_symbolic_operand (op, mode)
3417 enum machine_mode mode ATTRIBUTE_UNUSED;
3419 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3423 local_dynamic_symbolic_operand (op, mode)
3425 enum machine_mode mode ATTRIBUTE_UNUSED;
3427 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3431 initial_exec_symbolic_operand (op, mode)
3433 enum machine_mode mode ATTRIBUTE_UNUSED;
3435 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3439 local_exec_symbolic_operand (op, mode)
3441 enum machine_mode mode ATTRIBUTE_UNUSED;
3443 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3446 /* Test for a valid operand for a call instruction. Don't allow the
3447 arg pointer register or virtual regs since they may decay into
3448 reg + const, which the patterns can't handle. */
3451 call_insn_operand (op, mode)
3453 enum machine_mode mode ATTRIBUTE_UNUSED;
3455 /* Disallow indirect through a virtual register. This leads to
3456 compiler aborts when trying to eliminate them. */
3457 if (GET_CODE (op) == REG
3458 && (op == arg_pointer_rtx
3459 || op == frame_pointer_rtx
3460 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3461 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3464 /* Disallow `call 1234'. Due to varying assembler lameness this
3465 gets either rejected or translated to `call .+1234'. */
3466 if (GET_CODE (op) == CONST_INT)
3469 /* Explicitly allow SYMBOL_REF even if pic. */
3470 if (GET_CODE (op) == SYMBOL_REF)
3473 /* Otherwise we can allow any general_operand in the address. */
3474 return general_operand (op, Pmode);
3477 /* Test for a valid operand for a call instruction. Don't allow the
3478 arg pointer register or virtual regs since they may decay into
3479 reg + const, which the patterns can't handle. */
3482 sibcall_insn_operand (op, mode)
3484 enum machine_mode mode ATTRIBUTE_UNUSED;
3486 /* Disallow indirect through a virtual register. This leads to
3487 compiler aborts when trying to eliminate them. */
3488 if (GET_CODE (op) == REG
3489 && (op == arg_pointer_rtx
3490 || op == frame_pointer_rtx
3491 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3492 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3495 /* Explicitly allow SYMBOL_REF even if pic. */
3496 if (GET_CODE (op) == SYMBOL_REF)
3499 /* Otherwise we can only allow register operands. */
3500 return register_operand (op, Pmode);
3504 constant_call_address_operand (op, mode)
3506 enum machine_mode mode ATTRIBUTE_UNUSED;
3508 if (GET_CODE (op) == CONST
3509 && GET_CODE (XEXP (op, 0)) == PLUS
3510 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3511 op = XEXP (XEXP (op, 0), 0);
3512 return GET_CODE (op) == SYMBOL_REF;
3515 /* Match exactly zero and one. */
3518 const0_operand (op, mode)
3520 enum machine_mode mode;
3522 return op == CONST0_RTX (mode);
3526 const1_operand (op, mode)
3528 enum machine_mode mode ATTRIBUTE_UNUSED;
3530 return op == const1_rtx;
3533 /* Match 2, 4, or 8. Used for leal multiplicands. */
3536 const248_operand (op, mode)
3538 enum machine_mode mode ATTRIBUTE_UNUSED;
3540 return (GET_CODE (op) == CONST_INT
3541 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3544 /* True if this is a constant appropriate for an increment or decrement. */
3547 incdec_operand (op, mode)
3549 enum machine_mode mode ATTRIBUTE_UNUSED;
3551 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3552 registers, since carry flag is not set. */
3553 if (TARGET_PENTIUM4 && !optimize_size)
3555 return op == const1_rtx || op == constm1_rtx;
3558 /* Return nonzero if OP is acceptable as operand of DImode shift
3562 shiftdi_operand (op, mode)
3564 enum machine_mode mode ATTRIBUTE_UNUSED;
3567 return nonimmediate_operand (op, mode);
3569 return register_operand (op, mode);
3572 /* Return false if this is the stack pointer, or any other fake
3573 register eliminable to the stack pointer. Otherwise, this is
3576 This is used to prevent esp from being used as an index reg.
3577 Which would only happen in pathological cases. */
3580 reg_no_sp_operand (op, mode)
3582 enum machine_mode mode;
3585 if (GET_CODE (t) == SUBREG)
3587 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3590 return register_operand (op, mode);
3594 mmx_reg_operand (op, mode)
3596 enum machine_mode mode ATTRIBUTE_UNUSED;
3598 return MMX_REG_P (op);
3601 /* Return false if this is any eliminable register. Otherwise
3605 general_no_elim_operand (op, mode)
3607 enum machine_mode mode;
3610 if (GET_CODE (t) == SUBREG)
3612 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3613 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3614 || t == virtual_stack_dynamic_rtx)
3617 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3618 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3621 return general_operand (op, mode);
3624 /* Return false if this is any eliminable register. Otherwise
3625 register_operand or const_int. */
3628 nonmemory_no_elim_operand (op, mode)
3630 enum machine_mode mode;
3633 if (GET_CODE (t) == SUBREG)
3635 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3636 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3637 || t == virtual_stack_dynamic_rtx)
3640 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3643 /* Return false if this is any eliminable register or stack register,
3644 otherwise work like register_operand. */
3647 index_register_operand (op, mode)
3649 enum machine_mode mode;
3652 if (GET_CODE (t) == SUBREG)
3656 if (t == arg_pointer_rtx
3657 || t == frame_pointer_rtx
3658 || t == virtual_incoming_args_rtx
3659 || t == virtual_stack_vars_rtx
3660 || t == virtual_stack_dynamic_rtx
3661 || REGNO (t) == STACK_POINTER_REGNUM)
3664 return general_operand (op, mode);
3667 /* Return true if op is a Q_REGS class register. */
3670 q_regs_operand (op, mode)
3672 enum machine_mode mode;
3674 if (mode != VOIDmode && GET_MODE (op) != mode)
3676 if (GET_CODE (op) == SUBREG)
3677 op = SUBREG_REG (op);
3678 return ANY_QI_REG_P (op);
3681 /* Return true if op is an flags register. */
3684 flags_reg_operand (op, mode)
3686 enum machine_mode mode;
3688 if (mode != VOIDmode && GET_MODE (op) != mode)
3690 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3693 /* Return true if op is a NON_Q_REGS class register. */
3696 non_q_regs_operand (op, mode)
3698 enum machine_mode mode;
3700 if (mode != VOIDmode && GET_MODE (op) != mode)
3702 if (GET_CODE (op) == SUBREG)
3703 op = SUBREG_REG (op);
3704 return NON_QI_REG_P (op);
3708 zero_extended_scalar_load_operand (op, mode)
3710 enum machine_mode mode ATTRIBUTE_UNUSED;
3713 if (GET_CODE (op) != MEM)
3715 op = maybe_get_pool_constant (op);
3718 if (GET_CODE (op) != CONST_VECTOR)
3721 (GET_MODE_SIZE (GET_MODE (op)) /
3722 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3723 for (n_elts--; n_elts > 0; n_elts--)
3725 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3726 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3732 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3735 sse_comparison_operator (op, mode)
3737 enum machine_mode mode ATTRIBUTE_UNUSED;
3739 enum rtx_code code = GET_CODE (op);
3742 /* Operations supported directly. */
3752 /* These are equivalent to ones above in non-IEEE comparisons. */
3759 return !TARGET_IEEE_FP;
3764 /* Return 1 if OP is a valid comparison operator in valid mode. */
3766 ix86_comparison_operator (op, mode)
3768 enum machine_mode mode;
3770 enum machine_mode inmode;
3771 enum rtx_code code = GET_CODE (op);
3772 if (mode != VOIDmode && GET_MODE (op) != mode)
3774 if (GET_RTX_CLASS (code) != '<')
3776 inmode = GET_MODE (XEXP (op, 0));
3778 if (inmode == CCFPmode || inmode == CCFPUmode)
3780 enum rtx_code second_code, bypass_code;
3781 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3782 return (bypass_code == NIL && second_code == NIL);
3789 if (inmode == CCmode || inmode == CCGCmode
3790 || inmode == CCGOCmode || inmode == CCNOmode)
3793 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3794 if (inmode == CCmode)
3798 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3806 /* Return 1 if OP is a valid comparison operator testing carry flag
3809 ix86_carry_flag_operator (op, mode)
3811 enum machine_mode mode;
3813 enum machine_mode inmode;
3814 enum rtx_code code = GET_CODE (op);
3816 if (mode != VOIDmode && GET_MODE (op) != mode)
3818 if (GET_RTX_CLASS (code) != '<')
3820 inmode = GET_MODE (XEXP (op, 0));
3821 if (GET_CODE (XEXP (op, 0)) != REG
3822 || REGNO (XEXP (op, 0)) != 17
3823 || XEXP (op, 1) != const0_rtx)
3826 if (inmode == CCFPmode || inmode == CCFPUmode)
3828 enum rtx_code second_code, bypass_code;
3830 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3831 if (bypass_code != NIL || second_code != NIL)
3833 code = ix86_fp_compare_code_to_integer (code);
3835 else if (inmode != CCmode)
3840 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3843 fcmov_comparison_operator (op, mode)
3845 enum machine_mode mode;
3847 enum machine_mode inmode;
3848 enum rtx_code code = GET_CODE (op);
3850 if (mode != VOIDmode && GET_MODE (op) != mode)
3852 if (GET_RTX_CLASS (code) != '<')
3854 inmode = GET_MODE (XEXP (op, 0));
3855 if (inmode == CCFPmode || inmode == CCFPUmode)
3857 enum rtx_code second_code, bypass_code;
3859 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3860 if (bypass_code != NIL || second_code != NIL)
3862 code = ix86_fp_compare_code_to_integer (code);
3864 /* i387 supports just limited amount of conditional codes. */
3867 case LTU: case GTU: case LEU: case GEU:
3868 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3871 case ORDERED: case UNORDERED:
3879 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3882 promotable_binary_operator (op, mode)
3884 enum machine_mode mode ATTRIBUTE_UNUSED;
3886 switch (GET_CODE (op))
3889 /* Modern CPUs have same latency for HImode and SImode multiply,
3890 but 386 and 486 do HImode multiply faster. */
3891 return ix86_cpu > PROCESSOR_I486;
3903 /* Nearly general operand, but accept any const_double, since we wish
3904 to be able to drop them into memory rather than have them get pulled
3908 cmp_fp_expander_operand (op, mode)
3910 enum machine_mode mode;
3912 if (mode != VOIDmode && mode != GET_MODE (op))
3914 if (GET_CODE (op) == CONST_DOUBLE)
3916 return general_operand (op, mode);
3919 /* Match an SI or HImode register for a zero_extract. */
3922 ext_register_operand (op, mode)
3924 enum machine_mode mode ATTRIBUTE_UNUSED;
3927 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3928 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3931 if (!register_operand (op, VOIDmode))
3934 /* Be careful to accept only registers having upper parts. */
3935 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3936 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3939 /* Return 1 if this is a valid binary floating-point operation.
3940 OP is the expression matched, and MODE is its mode. */
3943 binary_fp_operator (op, mode)
3945 enum machine_mode mode;
3947 if (mode != VOIDmode && mode != GET_MODE (op))
3950 switch (GET_CODE (op))
3956 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3964 mult_operator (op, mode)
3966 enum machine_mode mode ATTRIBUTE_UNUSED;
3968 return GET_CODE (op) == MULT;
3972 div_operator (op, mode)
3974 enum machine_mode mode ATTRIBUTE_UNUSED;
3976 return GET_CODE (op) == DIV;
3980 arith_or_logical_operator (op, mode)
3982 enum machine_mode mode;
3984 return ((mode == VOIDmode || GET_MODE (op) == mode)
3985 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3986 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3989 /* Returns 1 if OP is memory operand with a displacement. */
3992 memory_displacement_operand (op, mode)
3994 enum machine_mode mode;
3996 struct ix86_address parts;
3998 if (! memory_operand (op, mode))
4001 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4004 return parts.disp != NULL_RTX;
4007 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4008 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4010 ??? It seems likely that this will only work because cmpsi is an
4011 expander, and no actual insns use this. */
4014 cmpsi_operand (op, mode)
4016 enum machine_mode mode;
4018 if (nonimmediate_operand (op, mode))
4021 if (GET_CODE (op) == AND
4022 && GET_MODE (op) == SImode
4023 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4024 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4025 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4026 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4027 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4028 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4034 /* Returns 1 if OP is memory operand that can not be represented by the
4038 long_memory_operand (op, mode)
4040 enum machine_mode mode;
4042 if (! memory_operand (op, mode))
4045 return memory_address_length (op) != 0;
4048 /* Return nonzero if the rtx is known aligned. */
4051 aligned_operand (op, mode)
4053 enum machine_mode mode;
4055 struct ix86_address parts;
4057 if (!general_operand (op, mode))
4060 /* Registers and immediate operands are always "aligned". */
4061 if (GET_CODE (op) != MEM)
4064 /* Don't even try to do any aligned optimizations with volatiles. */
4065 if (MEM_VOLATILE_P (op))
4070 /* Pushes and pops are only valid on the stack pointer. */
4071 if (GET_CODE (op) == PRE_DEC
4072 || GET_CODE (op) == POST_INC)
4075 /* Decode the address. */
4076 if (! ix86_decompose_address (op, &parts))
4079 if (parts.base && GET_CODE (parts.base) == SUBREG)
4080 parts.base = SUBREG_REG (parts.base);
4081 if (parts.index && GET_CODE (parts.index) == SUBREG)
4082 parts.index = SUBREG_REG (parts.index);
4084 /* Look for some component that isn't known to be aligned. */
4088 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4093 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4098 if (GET_CODE (parts.disp) != CONST_INT
4099 || (INTVAL (parts.disp) & 3) != 0)
4103 /* Didn't find one -- this must be an aligned address. */
4107 /* Return true if the constant is something that can be loaded with
4108 a special instruction. Only handle 0.0 and 1.0; others are less
4112 standard_80387_constant_p (x)
4115 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4117 /* Note that on the 80387, other constants, such as pi, that we should support
4118 too. On some machines, these are much slower to load as standard constant,
4119 than to load from doubles in memory. */
4120 if (x == CONST0_RTX (GET_MODE (x)))
4122 if (x == CONST1_RTX (GET_MODE (x)))
4127 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4130 standard_sse_constant_p (x)
4133 if (x == const0_rtx)
4135 return (x == CONST0_RTX (GET_MODE (x)));
4138 /* Returns 1 if OP contains a symbol reference */
4141 symbolic_reference_mentioned_p (op)
4144 register const char *fmt;
4147 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4150 fmt = GET_RTX_FORMAT (GET_CODE (op));
4151 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4157 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4158 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4162 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4169 /* Return 1 if it is appropriate to emit `ret' instructions in the
4170 body of a function. Do this only if the epilogue is simple, needing a
4171 couple of insns. Prior to reloading, we can't tell how many registers
4172 must be saved, so return 0 then. Return 0 if there is no frame
4173 marker to de-allocate.
4175 If NON_SAVING_SETJMP is defined and true, then it is not possible
4176 for the epilogue to be simple, so return 0. This is a special case
4177 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4178 until final, but jump_optimize may need to know sooner if a
4182 ix86_can_use_return_insn_p ()
4184 struct ix86_frame frame;
4186 #ifdef NON_SAVING_SETJMP
4187 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4191 if (! reload_completed || frame_pointer_needed)
4194 /* Don't allow more than 32 pop, since that's all we can do
4195 with one instruction. */
4196 if (current_function_pops_args
4197 && current_function_args_size >= 32768)
4200 ix86_compute_frame_layout (&frame);
4201 return frame.to_allocate == 0 && frame.nregs == 0;
4204 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4206 x86_64_sign_extended_value (value)
4209 switch (GET_CODE (value))
4211 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4212 to be at least 32 and this all acceptable constants are
4213 represented as CONST_INT. */
4215 if (HOST_BITS_PER_WIDE_INT == 32)
4219 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4220 return trunc_int_for_mode (val, SImode) == val;
4224 /* For certain code models, the symbolic references are known to fit.
4225 in CM_SMALL_PIC model we know it fits if it is local to the shared
4226 library. Don't count TLS SYMBOL_REFs here, since they should fit
4227 only if inside of UNSPEC handled below. */
4229 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4231 /* For certain code models, the code is near as well. */
4233 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4234 || ix86_cmodel == CM_KERNEL);
4236 /* We also may accept the offsetted memory references in certain special
4239 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4240 switch (XINT (XEXP (value, 0), 1))
4242 case UNSPEC_GOTPCREL:
4244 case UNSPEC_GOTNTPOFF:
4250 if (GET_CODE (XEXP (value, 0)) == PLUS)
4252 rtx op1 = XEXP (XEXP (value, 0), 0);
4253 rtx op2 = XEXP (XEXP (value, 0), 1);
4254 HOST_WIDE_INT offset;
4256 if (ix86_cmodel == CM_LARGE)
4258 if (GET_CODE (op2) != CONST_INT)
4260 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4261 switch (GET_CODE (op1))
4264 /* For CM_SMALL assume that latest object is 16MB before
4265 end of 31bits boundary. We may also accept pretty
4266 large negative constants knowing that all objects are
4267 in the positive half of address space. */
4268 if (ix86_cmodel == CM_SMALL
4269 && offset < 16*1024*1024
4270 && trunc_int_for_mode (offset, SImode) == offset)
4272 /* For CM_KERNEL we know that all object resist in the
4273 negative half of 32bits address space. We may not
4274 accept negative offsets, since they may be just off
4275 and we may accept pretty large positive ones. */
4276 if (ix86_cmodel == CM_KERNEL
4278 && trunc_int_for_mode (offset, SImode) == offset)
4282 /* These conditions are similar to SYMBOL_REF ones, just the
4283 constraints for code models differ. */
4284 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4285 && offset < 16*1024*1024
4286 && trunc_int_for_mode (offset, SImode) == offset)
4288 if (ix86_cmodel == CM_KERNEL
4290 && trunc_int_for_mode (offset, SImode) == offset)
4294 switch (XINT (op1, 1))
4299 && trunc_int_for_mode (offset, SImode) == offset)
4313 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4315 x86_64_zero_extended_value (value)
4318 switch (GET_CODE (value))
4321 if (HOST_BITS_PER_WIDE_INT == 32)
4322 return (GET_MODE (value) == VOIDmode
4323 && !CONST_DOUBLE_HIGH (value));
4327 if (HOST_BITS_PER_WIDE_INT == 32)
4328 return INTVAL (value) >= 0;
4330 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4333 /* For certain code models, the symbolic references are known to fit. */
4335 return ix86_cmodel == CM_SMALL;
4337 /* For certain code models, the code is near as well. */
4339 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4341 /* We also may accept the offsetted memory references in certain special
4344 if (GET_CODE (XEXP (value, 0)) == PLUS)
4346 rtx op1 = XEXP (XEXP (value, 0), 0);
4347 rtx op2 = XEXP (XEXP (value, 0), 1);
4349 if (ix86_cmodel == CM_LARGE)
4351 switch (GET_CODE (op1))
4355 /* For small code model we may accept pretty large positive
4356 offsets, since one bit is available for free. Negative
4357 offsets are limited by the size of NULL pointer area
4358 specified by the ABI. */
4359 if (ix86_cmodel == CM_SMALL
4360 && GET_CODE (op2) == CONST_INT
4361 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4362 && (trunc_int_for_mode (INTVAL (op2), SImode)
4365 /* ??? For the kernel, we may accept adjustment of
4366 -0x10000000, since we know that it will just convert
4367 negative address space to positive, but perhaps this
4368 is not worthwhile. */
4371 /* These conditions are similar to SYMBOL_REF ones, just the
4372 constraints for code models differ. */
4373 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4374 && GET_CODE (op2) == CONST_INT
4375 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4376 && (trunc_int_for_mode (INTVAL (op2), SImode)
4390 /* Value should be nonzero if functions must have frame pointers.
4391 Zero means the frame pointer need not be set up (and parms may
4392 be accessed via the stack pointer) in functions that seem suitable. */
4395 ix86_frame_pointer_required ()
4397 /* If we accessed previous frames, then the generated code expects
4398 to be able to access the saved ebp value in our frame. */
4399 if (cfun->machine->accesses_prev_frame)
4402 /* Several x86 os'es need a frame pointer for other reasons,
4403 usually pertaining to setjmp. */
4404 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4407 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4408 the frame pointer by default. Turn it back on now if we've not
4409 got a leaf function. */
4410 if (TARGET_OMIT_LEAF_FRAME_POINTER
4411 && (!current_function_is_leaf))
4414 if (current_function_profile)
4420 /* Record that the current function accesses previous call frames. */
4423 ix86_setup_frame_addresses ()
4425 cfun->machine->accesses_prev_frame = 1;
4428 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4429 # define USE_HIDDEN_LINKONCE 1
4431 # define USE_HIDDEN_LINKONCE 0
4434 static int pic_labels_used;
4436 /* Fills in the label name that should be used for a pc thunk for
4437 the given register. */
4440 get_pc_thunk_name (name, regno)
4444 if (USE_HIDDEN_LINKONCE)
4445 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4447 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4451 /* This function generates code for -fpic that loads %ebx with
4452 the return address of the caller and then returns. */
4455 ix86_asm_file_end (file)
4461 for (regno = 0; regno < 8; ++regno)
4465 if (! ((pic_labels_used >> regno) & 1))
4468 get_pc_thunk_name (name, regno);
4470 if (USE_HIDDEN_LINKONCE)
4474 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4476 TREE_PUBLIC (decl) = 1;
4477 TREE_STATIC (decl) = 1;
4478 DECL_ONE_ONLY (decl) = 1;
4480 (*targetm.asm_out.unique_section) (decl, 0);
4481 named_section (decl, NULL, 0);
4483 (*targetm.asm_out.globalize_label) (file, name);
4484 fputs ("\t.hidden\t", file);
4485 assemble_name (file, name);
4487 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4492 ASM_OUTPUT_LABEL (file, name);
4495 xops[0] = gen_rtx_REG (SImode, regno);
4496 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4497 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4498 output_asm_insn ("ret", xops);
4502 /* Emit code for the SET_GOT patterns. */
4505 output_set_got (dest)
4511 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4513 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4515 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4518 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4520 output_asm_insn ("call\t%a2", xops);
4523 /* Output the "canonical" label name ("Lxx$pb") here too. This
4524 is what will be referred to by the Mach-O PIC subsystem. */
4525 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4527 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4528 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4531 output_asm_insn ("pop{l}\t%0", xops);
4536 get_pc_thunk_name (name, REGNO (dest));
4537 pic_labels_used |= 1 << REGNO (dest);
4539 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4540 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4541 output_asm_insn ("call\t%X2", xops);
4544 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4545 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4546 else if (!TARGET_MACHO)
4547 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4552 /* Generate an "push" pattern for input ARG. */
4558 return gen_rtx_SET (VOIDmode,
4560 gen_rtx_PRE_DEC (Pmode,
4561 stack_pointer_rtx)),
4565 /* Return >= 0 if there is an unused call-clobbered register available
4566 for the entire function. */
4569 ix86_select_alt_pic_regnum ()
4571 if (current_function_is_leaf && !current_function_profile)
4574 for (i = 2; i >= 0; --i)
4575 if (!regs_ever_live[i])
4579 return INVALID_REGNUM;
4582 /* Return 1 if we need to save REGNO. */
4584 ix86_save_reg (regno, maybe_eh_return)
4586 int maybe_eh_return;
4588 if (pic_offset_table_rtx
4589 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4590 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4591 || current_function_profile
4592 || current_function_calls_eh_return))
4594 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4599 if (current_function_calls_eh_return && maybe_eh_return)
4604 unsigned test = EH_RETURN_DATA_REGNO (i);
4605 if (test == INVALID_REGNUM)
4612 return (regs_ever_live[regno]
4613 && !call_used_regs[regno]
4614 && !fixed_regs[regno]
4615 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4618 /* Return number of registers to be saved on the stack. */
4626 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4627 if (ix86_save_reg (regno, true))
4632 /* Return the offset between two registers, one to be eliminated, and the other
4633 its replacement, at the start of a routine. */
4636 ix86_initial_elimination_offset (from, to)
4640 struct ix86_frame frame;
4641 ix86_compute_frame_layout (&frame);
4643 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4644 return frame.hard_frame_pointer_offset;
4645 else if (from == FRAME_POINTER_REGNUM
4646 && to == HARD_FRAME_POINTER_REGNUM)
4647 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4650 if (to != STACK_POINTER_REGNUM)
4652 else if (from == ARG_POINTER_REGNUM)
4653 return frame.stack_pointer_offset;
4654 else if (from != FRAME_POINTER_REGNUM)
4657 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4661 /* Fill structure ix86_frame about frame of currently computed function. */
4664 ix86_compute_frame_layout (frame)
4665 struct ix86_frame *frame;
4667 HOST_WIDE_INT total_size;
4668 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4670 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4671 HOST_WIDE_INT size = get_frame_size ();
4673 frame->nregs = ix86_nsaved_regs ();
4676 /* Skip return address and saved base pointer. */
4677 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4679 frame->hard_frame_pointer_offset = offset;
4681 /* Do some sanity checking of stack_alignment_needed and
4682 preferred_alignment, since i386 port is the only using those features
4683 that may break easily. */
4685 if (size && !stack_alignment_needed)
4687 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4689 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4691 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4694 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4695 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4697 /* Register save area */
4698 offset += frame->nregs * UNITS_PER_WORD;
4701 if (ix86_save_varrargs_registers)
4703 offset += X86_64_VARARGS_SIZE;
4704 frame->va_arg_size = X86_64_VARARGS_SIZE;
4707 frame->va_arg_size = 0;
4709 /* Align start of frame for local function. */
4710 frame->padding1 = ((offset + stack_alignment_needed - 1)
4711 & -stack_alignment_needed) - offset;
4713 offset += frame->padding1;
4715 /* Frame pointer points here. */
4716 frame->frame_pointer_offset = offset;
4720 /* Add outgoing arguments area. Can be skipped if we eliminated
4721 all the function calls as dead code. */
4722 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4724 offset += current_function_outgoing_args_size;
4725 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4728 frame->outgoing_arguments_size = 0;
4730 /* Align stack boundary. Only needed if we're calling another function
4732 if (!current_function_is_leaf || current_function_calls_alloca)
4733 frame->padding2 = ((offset + preferred_alignment - 1)
4734 & -preferred_alignment) - offset;
4736 frame->padding2 = 0;
4738 offset += frame->padding2;
4740 /* We've reached end of stack frame. */
4741 frame->stack_pointer_offset = offset;
4743 /* Size prologue needs to allocate. */
4744 frame->to_allocate =
4745 (size + frame->padding1 + frame->padding2
4746 + frame->outgoing_arguments_size + frame->va_arg_size);
4748 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4749 && current_function_is_leaf)
4751 frame->red_zone_size = frame->to_allocate;
4752 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4753 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4756 frame->red_zone_size = 0;
4757 frame->to_allocate -= frame->red_zone_size;
4758 frame->stack_pointer_offset -= frame->red_zone_size;
4760 fprintf (stderr, "nregs: %i\n", frame->nregs);
4761 fprintf (stderr, "size: %i\n", size);
4762 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4763 fprintf (stderr, "padding1: %i\n", frame->padding1);
4764 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4765 fprintf (stderr, "padding2: %i\n", frame->padding2);
4766 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4767 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4768 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4769 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4770 frame->hard_frame_pointer_offset);
4771 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4775 /* Emit code to save registers in the prologue. */
4778 ix86_emit_save_regs ()
4783 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4784 if (ix86_save_reg (regno, true))
4786 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4787 RTX_FRAME_RELATED_P (insn) = 1;
4791 /* Emit code to save registers using MOV insns. First register
4792 is restored from POINTER + OFFSET. */
4794 ix86_emit_save_regs_using_mov (pointer, offset)
4796 HOST_WIDE_INT offset;
4801 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4802 if (ix86_save_reg (regno, true))
4804 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4806 gen_rtx_REG (Pmode, regno));
4807 RTX_FRAME_RELATED_P (insn) = 1;
4808 offset += UNITS_PER_WORD;
4812 /* Expand the prologue into a bunch of separate insns. */
4815 ix86_expand_prologue ()
4819 struct ix86_frame frame;
4821 HOST_WIDE_INT allocate;
4823 ix86_compute_frame_layout (&frame);
4826 int count = frame.nregs;
4828 /* The fast prologue uses move instead of push to save registers. This
4829 is significantly longer, but also executes faster as modern hardware
4830 can execute the moves in parallel, but can't do that for push/pop.
4832 Be careful about choosing what prologue to emit: When function takes
4833 many instructions to execute we may use slow version as well as in
4834 case function is known to be outside hot spot (this is known with
4835 feedback only). Weight the size of function by number of registers
4836 to save as it is cheap to use one or two push instructions but very
4837 slow to use many of them. */
4839 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4840 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4841 || (flag_branch_probabilities
4842 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4843 use_fast_prologue_epilogue = 0;
4845 use_fast_prologue_epilogue = !expensive_function_p (count);
4846 if (TARGET_PROLOGUE_USING_MOVE)
4847 use_mov = use_fast_prologue_epilogue;
4850 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4851 slower on all targets. Also sdb doesn't like it. */
4853 if (frame_pointer_needed)
4855 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4856 RTX_FRAME_RELATED_P (insn) = 1;
4858 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4859 RTX_FRAME_RELATED_P (insn) = 1;
4862 allocate = frame.to_allocate;
4863 /* In case we are dealing only with single register and empty frame,
4864 push is equivalent of the mov+add sequence. */
4865 if (allocate == 0 && frame.nregs <= 1)
4869 ix86_emit_save_regs ();
4871 allocate += frame.nregs * UNITS_PER_WORD;
4875 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4877 insn = emit_insn (gen_pro_epilogue_adjust_stack
4878 (stack_pointer_rtx, stack_pointer_rtx,
4879 GEN_INT (-allocate)));
4880 RTX_FRAME_RELATED_P (insn) = 1;
4884 /* ??? Is this only valid for Win32? */
4891 arg0 = gen_rtx_REG (SImode, 0);
4892 emit_move_insn (arg0, GEN_INT (allocate));
4894 sym = gen_rtx_MEM (FUNCTION_MODE,
4895 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4896 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4898 CALL_INSN_FUNCTION_USAGE (insn)
4899 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4900 CALL_INSN_FUNCTION_USAGE (insn));
4902 /* Don't allow scheduling pass to move insns across __alloca
4904 emit_insn (gen_blockage (const0_rtx));
4908 if (!frame_pointer_needed || !frame.to_allocate)
4909 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4911 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4912 -frame.nregs * UNITS_PER_WORD);
4915 #ifdef SUBTARGET_PROLOGUE
4919 pic_reg_used = false;
4920 if (pic_offset_table_rtx
4921 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4922 || current_function_profile))
4924 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4926 if (alt_pic_reg_used != INVALID_REGNUM)
4927 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4929 pic_reg_used = true;
4934 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4936 /* Even with accurate pre-reload life analysis, we can wind up
4937 deleting all references to the pic register after reload.
4938 Consider if cross-jumping unifies two sides of a branch
4939 controlled by a comparison vs the only read from a global.
4940 In which case, allow the set_got to be deleted, though we're
4941 too late to do anything about the ebx save in the prologue. */
4942 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4945 /* Prevent function calls from be scheduled before the call to mcount.
4946 In the pic_reg_used case, make sure that the got load isn't deleted. */
4947 if (current_function_profile)
4948 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4951 /* Emit code to restore saved registers using MOV insns. First register
4952 is restored from POINTER + OFFSET. */
4954 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4957 int maybe_eh_return;
4961 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4962 if (ix86_save_reg (regno, maybe_eh_return))
4964 emit_move_insn (gen_rtx_REG (Pmode, regno),
4965 adjust_address (gen_rtx_MEM (Pmode, pointer),
4967 offset += UNITS_PER_WORD;
4971 /* Restore function stack, frame, and registers. */
4974 ix86_expand_epilogue (style)
4978 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4979 struct ix86_frame frame;
4980 HOST_WIDE_INT offset;
4982 ix86_compute_frame_layout (&frame);
4984 /* Calculate start of saved registers relative to ebp. Special care
4985 must be taken for the normal return case of a function using
4986 eh_return: the eax and edx registers are marked as saved, but not
4987 restored along this path. */
4988 offset = frame.nregs;
4989 if (current_function_calls_eh_return && style != 2)
4991 offset *= -UNITS_PER_WORD;
4993 /* If we're only restoring one register and sp is not valid then
4994 using a move instruction to restore the register since it's
4995 less work than reloading sp and popping the register.
4997 The default code result in stack adjustment using add/lea instruction,
4998 while this code results in LEAVE instruction (or discrete equivalent),
4999 so it is profitable in some other cases as well. Especially when there
5000 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5001 and there is exactly one register to pop. This heuristic may need some
5002 tuning in future. */
5003 if ((!sp_valid && frame.nregs <= 1)
5004 || (TARGET_EPILOGUE_USING_MOVE
5005 && use_fast_prologue_epilogue
5006 && (frame.nregs > 1 || frame.to_allocate))
5007 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5008 || (frame_pointer_needed && TARGET_USE_LEAVE
5009 && use_fast_prologue_epilogue && frame.nregs == 1)
5010 || current_function_calls_eh_return)
5012 /* Restore registers. We can use ebp or esp to address the memory
5013 locations. If both are available, default to ebp, since offsets
5014 are known to be small. Only exception is esp pointing directly to the
5015 end of block of saved registers, where we may simplify addressing
5018 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5019 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5020 frame.to_allocate, style == 2);
5022 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5023 offset, style == 2);
5025 /* eh_return epilogues need %ecx added to the stack pointer. */
5028 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5030 if (frame_pointer_needed)
5032 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5033 tmp = plus_constant (tmp, UNITS_PER_WORD);
5034 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5036 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5037 emit_move_insn (hard_frame_pointer_rtx, tmp);
5039 emit_insn (gen_pro_epilogue_adjust_stack
5040 (stack_pointer_rtx, sa, const0_rtx));
5044 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5045 tmp = plus_constant (tmp, (frame.to_allocate
5046 + frame.nregs * UNITS_PER_WORD));
5047 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5050 else if (!frame_pointer_needed)
5051 emit_insn (gen_pro_epilogue_adjust_stack
5052 (stack_pointer_rtx, stack_pointer_rtx,
5053 GEN_INT (frame.to_allocate
5054 + frame.nregs * UNITS_PER_WORD)));
5055 /* If not an i386, mov & pop is faster than "leave". */
5056 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
5057 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5060 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5061 hard_frame_pointer_rtx,
5064 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5066 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5071 /* First step is to deallocate the stack frame so that we can
5072 pop the registers. */
5075 if (!frame_pointer_needed)
5077 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5078 hard_frame_pointer_rtx,
5081 else if (frame.to_allocate)
5082 emit_insn (gen_pro_epilogue_adjust_stack
5083 (stack_pointer_rtx, stack_pointer_rtx,
5084 GEN_INT (frame.to_allocate)));
5086 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5087 if (ix86_save_reg (regno, false))
5090 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5092 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5094 if (frame_pointer_needed)
5096 /* Leave results in shorter dependency chains on CPUs that are
5097 able to grok it fast. */
5098 if (TARGET_USE_LEAVE)
5099 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5100 else if (TARGET_64BIT)
5101 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5103 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5107 /* Sibcall epilogues don't want a return instruction. */
5111 if (current_function_pops_args && current_function_args_size)
5113 rtx popc = GEN_INT (current_function_pops_args);
5115 /* i386 can only pop 64K bytes. If asked to pop more, pop
5116 return address, do explicit add, and jump indirectly to the
5119 if (current_function_pops_args >= 65536)
5121 rtx ecx = gen_rtx_REG (SImode, 2);
5123 /* There are is no "pascal" calling convention in 64bit ABI. */
5127 emit_insn (gen_popsi1 (ecx));
5128 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5129 emit_jump_insn (gen_return_indirect_internal (ecx));
5132 emit_jump_insn (gen_return_pop_internal (popc));
5135 emit_jump_insn (gen_return_internal ());
5138 /* Reset from the function's potential modifications. */
5141 ix86_output_function_epilogue (file, size)
5142 FILE *file ATTRIBUTE_UNUSED;
5143 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5145 if (pic_offset_table_rtx)
5146 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5149 /* Extract the parts of an RTL expression that is a valid memory address
5150 for an instruction. Return 0 if the structure of the address is
5151 grossly off. Return -1 if the address contains ASHIFT, so it is not
5152 strictly valid, but still used for computing length of lea instruction.
5156 ix86_decompose_address (addr, out)
5158 struct ix86_address *out;
5160 rtx base = NULL_RTX;
5161 rtx index = NULL_RTX;
5162 rtx disp = NULL_RTX;
5163 HOST_WIDE_INT scale = 1;
5164 rtx scale_rtx = NULL_RTX;
5167 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5169 else if (GET_CODE (addr) == PLUS)
5171 rtx op0 = XEXP (addr, 0);
5172 rtx op1 = XEXP (addr, 1);
5173 enum rtx_code code0 = GET_CODE (op0);
5174 enum rtx_code code1 = GET_CODE (op1);
5176 if (code0 == REG || code0 == SUBREG)
5178 if (code1 == REG || code1 == SUBREG)
5179 index = op0, base = op1; /* index + base */
5181 base = op0, disp = op1; /* base + displacement */
5183 else if (code0 == MULT)
5185 index = XEXP (op0, 0);
5186 scale_rtx = XEXP (op0, 1);
5187 if (code1 == REG || code1 == SUBREG)
5188 base = op1; /* index*scale + base */
5190 disp = op1; /* index*scale + disp */
5192 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5194 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5195 scale_rtx = XEXP (XEXP (op0, 0), 1);
5196 base = XEXP (op0, 1);
5199 else if (code0 == PLUS)
5201 index = XEXP (op0, 0); /* index + base + disp */
5202 base = XEXP (op0, 1);
5208 else if (GET_CODE (addr) == MULT)
5210 index = XEXP (addr, 0); /* index*scale */
5211 scale_rtx = XEXP (addr, 1);
5213 else if (GET_CODE (addr) == ASHIFT)
5217 /* We're called for lea too, which implements ashift on occasion. */
5218 index = XEXP (addr, 0);
5219 tmp = XEXP (addr, 1);
5220 if (GET_CODE (tmp) != CONST_INT)
5222 scale = INTVAL (tmp);
5223 if ((unsigned HOST_WIDE_INT) scale > 3)
5229 disp = addr; /* displacement */
5231 /* Extract the integral value of scale. */
5234 if (GET_CODE (scale_rtx) != CONST_INT)
5236 scale = INTVAL (scale_rtx);
5239 /* Allow arg pointer and stack pointer as index if there is not scaling */
5240 if (base && index && scale == 1
5241 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5242 || index == stack_pointer_rtx))
5249 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5250 if ((base == hard_frame_pointer_rtx
5251 || base == frame_pointer_rtx
5252 || base == arg_pointer_rtx) && !disp)
5255 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5256 Avoid this by transforming to [%esi+0]. */
5257 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5258 && base && !index && !disp
5260 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5263 /* Special case: encode reg+reg instead of reg*2. */
5264 if (!base && index && scale && scale == 2)
5265 base = index, scale = 1;
5267 /* Special case: scaling cannot be encoded without base or displacement. */
5268 if (!base && !disp && index && scale != 1)
5279 /* Return cost of the memory address x.
5280 For i386, it is better to use a complex address than let gcc copy
5281 the address into a reg and make a new pseudo. But not if the address
5282 requires to two regs - that would mean more pseudos with longer
5285 ix86_address_cost (x)
5288 struct ix86_address parts;
5291 if (!ix86_decompose_address (x, &parts))
5294 if (parts.base && GET_CODE (parts.base) == SUBREG)
5295 parts.base = SUBREG_REG (parts.base);
5296 if (parts.index && GET_CODE (parts.index) == SUBREG)
5297 parts.index = SUBREG_REG (parts.index);
5299 /* More complex memory references are better. */
5300 if (parts.disp && parts.disp != const0_rtx)
5303 /* Attempt to minimize number of registers in the address. */
5305 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5307 && (!REG_P (parts.index)
5308 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5312 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5314 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5315 && parts.base != parts.index)
5318 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5319 since it's predecode logic can't detect the length of instructions
5320 and it degenerates to vector decoded. Increase cost of such
5321 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5322 to split such addresses or even refuse such addresses at all.
5324 Following addressing modes are affected:
5329 The first and last case may be avoidable by explicitly coding the zero in
5330 memory address, but I don't have AMD-K6 machine handy to check this
5334 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5335 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5336 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5342 /* If X is a machine specific address (i.e. a symbol or label being
5343 referenced as a displacement from the GOT implemented using an
5344 UNSPEC), then return the base term. Otherwise return X. */
5347 ix86_find_base_term (x)
5354 if (GET_CODE (x) != CONST)
5357 if (GET_CODE (term) == PLUS
5358 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5359 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5360 term = XEXP (term, 0);
5361 if (GET_CODE (term) != UNSPEC
5362 || XINT (term, 1) != UNSPEC_GOTPCREL)
5365 term = XVECEXP (term, 0, 0);
5367 if (GET_CODE (term) != SYMBOL_REF
5368 && GET_CODE (term) != LABEL_REF)
5374 term = i386_simplify_dwarf_addr (x);
5376 if (GET_CODE (term) != SYMBOL_REF
5377 && GET_CODE (term) != LABEL_REF)
5383 /* Determine if a given RTX is a valid constant. We already know this
5384 satisfies CONSTANT_P. */
5387 legitimate_constant_p (x)
5392 switch (GET_CODE (x))
5395 /* TLS symbols are not constant. */
5396 if (tls_symbolic_operand (x, Pmode))
5401 inner = XEXP (x, 0);
5403 /* Offsets of TLS symbols are never valid.
5404 Discourage CSE from creating them. */
5405 if (GET_CODE (inner) == PLUS
5406 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5409 /* Only some unspecs are valid as "constants". */
5410 if (GET_CODE (inner) == UNSPEC)
5411 switch (XINT (inner, 1))
5414 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5424 /* Otherwise we handle everything else in the move patterns. */
5428 /* Determine if it's legal to put X into the constant pool. This
5429 is not possible for the address of thread-local symbols, which
5430 is checked above. */
5433 ix86_cannot_force_const_mem (x)
5436 return !legitimate_constant_p (x);
5439 /* Determine if a given RTX is a valid constant address. */
5442 constant_address_p (x)
5445 switch (GET_CODE (x))
5452 return TARGET_64BIT;
5455 /* For Mach-O, really believe the CONST. */
5458 /* Otherwise fall through. */
5460 return !flag_pic && legitimate_constant_p (x);
5467 /* Nonzero if the constant value X is a legitimate general operand
5468 when generating PIC code. It is given that flag_pic is on and
5469 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5472 legitimate_pic_operand_p (x)
5477 switch (GET_CODE (x))
5480 inner = XEXP (x, 0);
5482 /* Only some unspecs are valid as "constants". */
5483 if (GET_CODE (inner) == UNSPEC)
5484 switch (XINT (inner, 1))
5487 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5495 return legitimate_pic_address_disp_p (x);
5502 /* Determine if a given CONST RTX is a valid memory displacement
5506 legitimate_pic_address_disp_p (disp)
5511 /* In 64bit mode we can allow direct addresses of symbols and labels
5512 when they are not dynamic symbols. */
5515 /* TLS references should always be enclosed in UNSPEC. */
5516 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5518 if (GET_CODE (disp) == SYMBOL_REF
5519 && ix86_cmodel == CM_SMALL_PIC
5520 && (CONSTANT_POOL_ADDRESS_P (disp)
5521 || SYMBOL_REF_FLAG (disp)))
5523 if (GET_CODE (disp) == LABEL_REF)
5525 if (GET_CODE (disp) == CONST
5526 && GET_CODE (XEXP (disp, 0)) == PLUS
5527 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5528 && ix86_cmodel == CM_SMALL_PIC
5529 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5530 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5531 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5532 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5533 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5534 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5537 if (GET_CODE (disp) != CONST)
5539 disp = XEXP (disp, 0);
5543 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5544 of GOT tables. We should not need these anyway. */
5545 if (GET_CODE (disp) != UNSPEC
5546 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5549 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5550 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5556 if (GET_CODE (disp) == PLUS)
5558 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5560 disp = XEXP (disp, 0);
5564 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5565 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5567 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5568 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5569 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5571 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5572 if (strstr (sym_name, "$pb") != 0)
5577 if (GET_CODE (disp) != UNSPEC)
5580 switch (XINT (disp, 1))
5585 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5587 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5588 case UNSPEC_GOTTPOFF:
5589 case UNSPEC_GOTNTPOFF:
5590 case UNSPEC_INDNTPOFF:
5593 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5595 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5597 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5603 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5604 memory address for an instruction. The MODE argument is the machine mode
5605 for the MEM expression that wants to use this address.
5607 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5608 convert common non-canonical forms to canonical form so that they will
5612 legitimate_address_p (mode, addr, strict)
5613 enum machine_mode mode;
5617 struct ix86_address parts;
5618 rtx base, index, disp;
5619 HOST_WIDE_INT scale;
5620 const char *reason = NULL;
5621 rtx reason_rtx = NULL_RTX;
5623 if (TARGET_DEBUG_ADDR)
5626 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5627 GET_MODE_NAME (mode), strict);
5631 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5633 if (TARGET_DEBUG_ADDR)
5634 fprintf (stderr, "Success.\n");
5638 if (ix86_decompose_address (addr, &parts) <= 0)
5640 reason = "decomposition failed";
5645 index = parts.index;
5647 scale = parts.scale;
5649 /* Validate base register.
5651 Don't allow SUBREG's here, it can lead to spill failures when the base
5652 is one word out of a two word structure, which is represented internally
5660 if (GET_CODE (base) == SUBREG)
5661 reg = SUBREG_REG (base);
5665 if (GET_CODE (reg) != REG)
5667 reason = "base is not a register";
5671 if (GET_MODE (base) != Pmode)
5673 reason = "base is not in Pmode";
5677 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5678 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5680 reason = "base is not valid";
5685 /* Validate index register.
5687 Don't allow SUBREG's here, it can lead to spill failures when the index
5688 is one word out of a two word structure, which is represented internally
5696 if (GET_CODE (index) == SUBREG)
5697 reg = SUBREG_REG (index);
5701 if (GET_CODE (reg) != REG)
5703 reason = "index is not a register";
5707 if (GET_MODE (index) != Pmode)
5709 reason = "index is not in Pmode";
5713 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5714 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5716 reason = "index is not valid";
5721 /* Validate scale factor. */
5724 reason_rtx = GEN_INT (scale);
5727 reason = "scale without index";
5731 if (scale != 2 && scale != 4 && scale != 8)
5733 reason = "scale is not a valid multiplier";
5738 /* Validate displacement. */
5743 if (GET_CODE (disp) == CONST
5744 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5745 switch (XINT (XEXP (disp, 0), 1))
5749 case UNSPEC_GOTPCREL:
5752 goto is_legitimate_pic;
5754 case UNSPEC_GOTTPOFF:
5755 case UNSPEC_GOTNTPOFF:
5756 case UNSPEC_INDNTPOFF:
5762 reason = "invalid address unspec";
5766 else if (flag_pic && (SYMBOLIC_CONST (disp)
5768 && !machopic_operand_p (disp)
5773 if (TARGET_64BIT && (index || base))
5775 /* foo@dtpoff(%rX) is ok. */
5776 if (GET_CODE (disp) != CONST
5777 || GET_CODE (XEXP (disp, 0)) != PLUS
5778 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5779 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5780 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5781 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5783 reason = "non-constant pic memory reference";
5787 else if (! legitimate_pic_address_disp_p (disp))
5789 reason = "displacement is an invalid pic construct";
5793 /* This code used to verify that a symbolic pic displacement
5794 includes the pic_offset_table_rtx register.
5796 While this is good idea, unfortunately these constructs may
5797 be created by "adds using lea" optimization for incorrect
5806 This code is nonsensical, but results in addressing
5807 GOT table with pic_offset_table_rtx base. We can't
5808 just refuse it easily, since it gets matched by
5809 "addsi3" pattern, that later gets split to lea in the
5810 case output register differs from input. While this
5811 can be handled by separate addsi pattern for this case
5812 that never results in lea, this seems to be easier and
5813 correct fix for crash to disable this test. */
5815 else if (!CONSTANT_ADDRESS_P (disp))
5817 reason = "displacement is not constant";
5820 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5822 reason = "displacement is out of range";
5825 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5827 reason = "displacement is a const_double";
5832 /* Everything looks valid. */
5833 if (TARGET_DEBUG_ADDR)
5834 fprintf (stderr, "Success.\n");
5838 if (TARGET_DEBUG_ADDR)
5840 fprintf (stderr, "Error: %s\n", reason);
5841 debug_rtx (reason_rtx);
5846 /* Return an unique alias set for the GOT. */
5848 static HOST_WIDE_INT
5849 ix86_GOT_alias_set ()
5851 static HOST_WIDE_INT set = -1;
5853 set = new_alias_set ();
5857 /* Return a legitimate reference for ORIG (an address) using the
5858 register REG. If REG is 0, a new pseudo is generated.
5860 There are two types of references that must be handled:
5862 1. Global data references must load the address from the GOT, via
5863 the PIC reg. An insn is emitted to do this load, and the reg is
5866 2. Static data references, constant pool addresses, and code labels
5867 compute the address as an offset from the GOT, whose base is in
5868 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5869 differentiate them from global data objects. The returned
5870 address is the PIC reg + an unspec constant.
5872 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5873 reg also appears in the address. */
5876 legitimize_pic_address (orig, reg)
5886 reg = gen_reg_rtx (Pmode);
5887 /* Use the generic Mach-O PIC machinery. */
5888 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5891 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5893 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5895 /* This symbol may be referenced via a displacement from the PIC
5896 base address (@GOTOFF). */
5898 if (reload_in_progress)
5899 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5900 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5901 new = gen_rtx_CONST (Pmode, new);
5902 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5906 emit_move_insn (reg, new);
5910 else if (GET_CODE (addr) == SYMBOL_REF)
5914 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5915 new = gen_rtx_CONST (Pmode, new);
5916 new = gen_rtx_MEM (Pmode, new);
5917 RTX_UNCHANGING_P (new) = 1;
5918 set_mem_alias_set (new, ix86_GOT_alias_set ());
5921 reg = gen_reg_rtx (Pmode);
5922 /* Use directly gen_movsi, otherwise the address is loaded
5923 into register for CSE. We don't want to CSE this addresses,
5924 instead we CSE addresses from the GOT table, so skip this. */
5925 emit_insn (gen_movsi (reg, new));
5930 /* This symbol must be referenced via a load from the
5931 Global Offset Table (@GOT). */
5933 if (reload_in_progress)
5934 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5935 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5936 new = gen_rtx_CONST (Pmode, new);
5937 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5938 new = gen_rtx_MEM (Pmode, new);
5939 RTX_UNCHANGING_P (new) = 1;
5940 set_mem_alias_set (new, ix86_GOT_alias_set ());
5943 reg = gen_reg_rtx (Pmode);
5944 emit_move_insn (reg, new);
5950 if (GET_CODE (addr) == CONST)
5952 addr = XEXP (addr, 0);
5954 /* We must match stuff we generate before. Assume the only
5955 unspecs that can get here are ours. Not that we could do
5956 anything with them anyway... */
5957 if (GET_CODE (addr) == UNSPEC
5958 || (GET_CODE (addr) == PLUS
5959 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5961 if (GET_CODE (addr) != PLUS)
5964 if (GET_CODE (addr) == PLUS)
5966 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5968 /* Check first to see if this is a constant offset from a @GOTOFF
5969 symbol reference. */
5970 if (local_symbolic_operand (op0, Pmode)
5971 && GET_CODE (op1) == CONST_INT)
5975 if (reload_in_progress)
5976 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5977 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5979 new = gen_rtx_PLUS (Pmode, new, op1);
5980 new = gen_rtx_CONST (Pmode, new);
5981 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5985 emit_move_insn (reg, new);
5991 if (INTVAL (op1) < -16*1024*1024
5992 || INTVAL (op1) >= 16*1024*1024)
5993 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5998 base = legitimize_pic_address (XEXP (addr, 0), reg);
5999 new = legitimize_pic_address (XEXP (addr, 1),
6000 base == reg ? NULL_RTX : reg);
6002 if (GET_CODE (new) == CONST_INT)
6003 new = plus_constant (base, INTVAL (new));
6006 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6008 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6009 new = XEXP (new, 1);
6011 new = gen_rtx_PLUS (Pmode, base, new);
6020 ix86_encode_section_info (decl, first)
6022 int first ATTRIBUTE_UNUSED;
6024 bool local_p = (*targetm.binds_local_p) (decl);
6027 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
6028 if (GET_CODE (rtl) != MEM)
6030 symbol = XEXP (rtl, 0);
6031 if (GET_CODE (symbol) != SYMBOL_REF)
6034 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
6035 symbol so that we may access it directly in the GOT. */
6038 SYMBOL_REF_FLAG (symbol) = local_p;
6040 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
6041 "local dynamic", "initial exec" or "local exec" TLS models
6044 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
6046 const char *symbol_str;
6049 enum tls_model kind = decl_tls_model (decl);
6051 if (TARGET_64BIT && ! flag_pic)
6053 /* x86-64 doesn't allow non-pic code for shared libraries,
6054 so don't generate GD/LD TLS models for non-pic code. */
6057 case TLS_MODEL_GLOBAL_DYNAMIC:
6058 kind = TLS_MODEL_INITIAL_EXEC; break;
6059 case TLS_MODEL_LOCAL_DYNAMIC:
6060 kind = TLS_MODEL_LOCAL_EXEC; break;
6066 symbol_str = XSTR (symbol, 0);
6068 if (symbol_str[0] == '%')
6070 if (symbol_str[1] == tls_model_chars[kind])
6074 len = strlen (symbol_str) + 1;
6075 newstr = alloca (len + 2);
6078 newstr[1] = tls_model_chars[kind];
6079 memcpy (newstr + 2, symbol_str, len);
6081 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
6085 /* Undo the above when printing symbol names. */
6088 ix86_strip_name_encoding (str)
6098 /* Load the thread pointer into a register. */
6101 get_thread_pointer ()
6105 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6106 tp = gen_rtx_MEM (Pmode, tp);
6107 RTX_UNCHANGING_P (tp) = 1;
6108 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6109 tp = force_reg (Pmode, tp);
6114 /* Try machine-dependent ways of modifying an illegitimate address
6115 to be legitimate. If we find one, return the new, valid address.
6116 This macro is used in only one place: `memory_address' in explow.c.
6118 OLDX is the address as it was before break_out_memory_refs was called.
6119 In some cases it is useful to look at this to decide what needs to be done.
6121 MODE and WIN are passed so that this macro can use
6122 GO_IF_LEGITIMATE_ADDRESS.
6124 It is always safe for this macro to do nothing. It exists to recognize
6125 opportunities to optimize the output.
6127 For the 80386, we handle X+REG by loading X into a register R and
6128 using R+REG. R will go in a general reg and indexing will be used.
6129 However, if REG is a broken-out memory address or multiplication,
6130 nothing needs to be done because REG can certainly go in a general reg.
6132 When -fpic is used, special handling is needed for symbolic references.
6133 See comments by legitimize_pic_address in i386.c for details. */
6136 legitimize_address (x, oldx, mode)
6138 register rtx oldx ATTRIBUTE_UNUSED;
6139 enum machine_mode mode;
6144 if (TARGET_DEBUG_ADDR)
6146 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6147 GET_MODE_NAME (mode));
6151 log = tls_symbolic_operand (x, mode);
6154 rtx dest, base, off, pic;
6159 case TLS_MODEL_GLOBAL_DYNAMIC:
6160 dest = gen_reg_rtx (Pmode);
6163 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6166 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6167 insns = get_insns ();
6170 emit_libcall_block (insns, dest, rax, x);
6173 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6176 case TLS_MODEL_LOCAL_DYNAMIC:
6177 base = gen_reg_rtx (Pmode);
6180 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6183 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6184 insns = get_insns ();
6187 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6188 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6189 emit_libcall_block (insns, base, rax, note);
6192 emit_insn (gen_tls_local_dynamic_base_32 (base));
6194 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6195 off = gen_rtx_CONST (Pmode, off);
6197 return gen_rtx_PLUS (Pmode, base, off);
6199 case TLS_MODEL_INITIAL_EXEC:
6203 type = UNSPEC_GOTNTPOFF;
6207 if (reload_in_progress)
6208 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6209 pic = pic_offset_table_rtx;
6210 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6212 else if (!TARGET_GNU_TLS)
6214 pic = gen_reg_rtx (Pmode);
6215 emit_insn (gen_set_got (pic));
6216 type = UNSPEC_GOTTPOFF;
6221 type = UNSPEC_INDNTPOFF;
6224 base = get_thread_pointer ();
6226 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6227 off = gen_rtx_CONST (Pmode, off);
6229 off = gen_rtx_PLUS (Pmode, pic, off);
6230 off = gen_rtx_MEM (Pmode, off);
6231 RTX_UNCHANGING_P (off) = 1;
6232 set_mem_alias_set (off, ix86_GOT_alias_set ());
6233 dest = gen_reg_rtx (Pmode);
6235 if (TARGET_64BIT || TARGET_GNU_TLS)
6237 emit_move_insn (dest, off);
6238 return gen_rtx_PLUS (Pmode, base, dest);
6241 emit_insn (gen_subsi3 (dest, base, off));
6244 case TLS_MODEL_LOCAL_EXEC:
6245 base = get_thread_pointer ();
6247 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6248 (TARGET_64BIT || TARGET_GNU_TLS)
6249 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6250 off = gen_rtx_CONST (Pmode, off);
6252 if (TARGET_64BIT || TARGET_GNU_TLS)
6253 return gen_rtx_PLUS (Pmode, base, off);
6256 dest = gen_reg_rtx (Pmode);
6257 emit_insn (gen_subsi3 (dest, base, off));
6268 if (flag_pic && SYMBOLIC_CONST (x))
6269 return legitimize_pic_address (x, 0);
6271 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6272 if (GET_CODE (x) == ASHIFT
6273 && GET_CODE (XEXP (x, 1)) == CONST_INT
6274 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6277 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6278 GEN_INT (1 << log));
6281 if (GET_CODE (x) == PLUS)
6283 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6285 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6286 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6287 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6290 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6291 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6292 GEN_INT (1 << log));
6295 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6296 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6297 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6300 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6301 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6302 GEN_INT (1 << log));
6305 /* Put multiply first if it isn't already. */
6306 if (GET_CODE (XEXP (x, 1)) == MULT)
6308 rtx tmp = XEXP (x, 0);
6309 XEXP (x, 0) = XEXP (x, 1);
6314 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6315 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6316 created by virtual register instantiation, register elimination, and
6317 similar optimizations. */
6318 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6321 x = gen_rtx_PLUS (Pmode,
6322 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6323 XEXP (XEXP (x, 1), 0)),
6324 XEXP (XEXP (x, 1), 1));
6328 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6329 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6330 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6331 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6332 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6333 && CONSTANT_P (XEXP (x, 1)))
6336 rtx other = NULL_RTX;
6338 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6340 constant = XEXP (x, 1);
6341 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6343 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6345 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6346 other = XEXP (x, 1);
6354 x = gen_rtx_PLUS (Pmode,
6355 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6356 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6357 plus_constant (other, INTVAL (constant)));
6361 if (changed && legitimate_address_p (mode, x, FALSE))
6364 if (GET_CODE (XEXP (x, 0)) == MULT)
6367 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6370 if (GET_CODE (XEXP (x, 1)) == MULT)
6373 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6377 && GET_CODE (XEXP (x, 1)) == REG
6378 && GET_CODE (XEXP (x, 0)) == REG)
6381 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6384 x = legitimize_pic_address (x, 0);
6387 if (changed && legitimate_address_p (mode, x, FALSE))
6390 if (GET_CODE (XEXP (x, 0)) == REG)
6392 register rtx temp = gen_reg_rtx (Pmode);
6393 register rtx val = force_operand (XEXP (x, 1), temp);
6395 emit_move_insn (temp, val);
6401 else if (GET_CODE (XEXP (x, 1)) == REG)
6403 register rtx temp = gen_reg_rtx (Pmode);
6404 register rtx val = force_operand (XEXP (x, 0), temp);
6406 emit_move_insn (temp, val);
6416 /* Print an integer constant expression in assembler syntax. Addition
6417 and subtraction are the only arithmetic that may appear in these
6418 expressions. FILE is the stdio stream to write to, X is the rtx, and
6419 CODE is the operand print code from the output string. */
6422 output_pic_addr_const (file, x, code)
6429 switch (GET_CODE (x))
6439 assemble_name (file, XSTR (x, 0));
6440 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6441 fputs ("@PLT", file);
6448 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6449 assemble_name (asm_out_file, buf);
6453 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6457 /* This used to output parentheses around the expression,
6458 but that does not work on the 386 (either ATT or BSD assembler). */
6459 output_pic_addr_const (file, XEXP (x, 0), code);
6463 if (GET_MODE (x) == VOIDmode)
6465 /* We can use %d if the number is <32 bits and positive. */
6466 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6467 fprintf (file, "0x%lx%08lx",
6468 (unsigned long) CONST_DOUBLE_HIGH (x),
6469 (unsigned long) CONST_DOUBLE_LOW (x));
6471 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6474 /* We can't handle floating point constants;
6475 PRINT_OPERAND must handle them. */
6476 output_operand_lossage ("floating constant misused");
6480 /* Some assemblers need integer constants to appear first. */
6481 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6483 output_pic_addr_const (file, XEXP (x, 0), code);
6485 output_pic_addr_const (file, XEXP (x, 1), code);
6487 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6489 output_pic_addr_const (file, XEXP (x, 1), code);
6491 output_pic_addr_const (file, XEXP (x, 0), code);
6499 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6500 output_pic_addr_const (file, XEXP (x, 0), code);
6502 output_pic_addr_const (file, XEXP (x, 1), code);
6504 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6508 if (XVECLEN (x, 0) != 1)
6510 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6511 switch (XINT (x, 1))
6514 fputs ("@GOT", file);
6517 fputs ("@GOTOFF", file);
6519 case UNSPEC_GOTPCREL:
6520 fputs ("@GOTPCREL(%rip)", file);
6522 case UNSPEC_GOTTPOFF:
6523 /* FIXME: This might be @TPOFF in Sun ld too. */
6524 fputs ("@GOTTPOFF", file);
6527 fputs ("@TPOFF", file);
6531 fputs ("@TPOFF", file);
6533 fputs ("@NTPOFF", file);
6536 fputs ("@DTPOFF", file);
6538 case UNSPEC_GOTNTPOFF:
6540 fputs ("@GOTTPOFF(%rip)", file);
6542 fputs ("@GOTNTPOFF", file);
6544 case UNSPEC_INDNTPOFF:
6545 fputs ("@INDNTPOFF", file);
6548 output_operand_lossage ("invalid UNSPEC as operand");
6554 output_operand_lossage ("invalid expression as operand");
6558 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6559 We need to handle our special PIC relocations. */
6562 i386_dwarf_output_addr_const (file, x)
6567 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6571 fprintf (file, "%s", ASM_LONG);
6574 output_pic_addr_const (file, x, '\0');
6576 output_addr_const (file, x);
6580 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6581 We need to emit DTP-relative relocations. */
6584 i386_output_dwarf_dtprel (file, size, x)
6589 fputs (ASM_LONG, file);
6590 output_addr_const (file, x);
6591 fputs ("@DTPOFF", file);
6597 fputs (", 0", file);
6604 /* In the name of slightly smaller debug output, and to cater to
6605 general assembler losage, recognize PIC+GOTOFF and turn it back
6606 into a direct symbol reference. */
6609 i386_simplify_dwarf_addr (orig_x)
6614 if (GET_CODE (x) == MEM)
6619 if (GET_CODE (x) != CONST
6620 || GET_CODE (XEXP (x, 0)) != UNSPEC
6621 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6622 || GET_CODE (orig_x) != MEM)
6624 return XVECEXP (XEXP (x, 0), 0, 0);
6627 if (GET_CODE (x) != PLUS
6628 || GET_CODE (XEXP (x, 1)) != CONST)
6631 if (GET_CODE (XEXP (x, 0)) == REG
6632 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6633 /* %ebx + GOT/GOTOFF */
6635 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6637 /* %ebx + %reg * scale + GOT/GOTOFF */
6639 if (GET_CODE (XEXP (y, 0)) == REG
6640 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6642 else if (GET_CODE (XEXP (y, 1)) == REG
6643 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6647 if (GET_CODE (y) != REG
6648 && GET_CODE (y) != MULT
6649 && GET_CODE (y) != ASHIFT)
6655 x = XEXP (XEXP (x, 1), 0);
6656 if (GET_CODE (x) == UNSPEC
6657 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6658 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6661 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6662 return XVECEXP (x, 0, 0);
6665 if (GET_CODE (x) == PLUS
6666 && GET_CODE (XEXP (x, 0)) == UNSPEC
6667 && GET_CODE (XEXP (x, 1)) == CONST_INT
6668 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6669 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6670 && GET_CODE (orig_x) != MEM)))
6672 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6674 return gen_rtx_PLUS (Pmode, y, x);
6682 put_condition_code (code, mode, reverse, fp, file)
6684 enum machine_mode mode;
6690 if (mode == CCFPmode || mode == CCFPUmode)
6692 enum rtx_code second_code, bypass_code;
6693 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6694 if (bypass_code != NIL || second_code != NIL)
6696 code = ix86_fp_compare_code_to_integer (code);
6700 code = reverse_condition (code);
6711 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6716 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6717 Those same assemblers have the same but opposite losage on cmov. */
6720 suffix = fp ? "nbe" : "a";
6723 if (mode == CCNOmode || mode == CCGOCmode)
6725 else if (mode == CCmode || mode == CCGCmode)
6736 if (mode == CCNOmode || mode == CCGOCmode)
6738 else if (mode == CCmode || mode == CCGCmode)
6747 suffix = fp ? "nb" : "ae";
6750 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6760 suffix = fp ? "u" : "p";
6763 suffix = fp ? "nu" : "np";
6768 fputs (suffix, file);
6772 print_reg (x, code, file)
6777 if (REGNO (x) == ARG_POINTER_REGNUM
6778 || REGNO (x) == FRAME_POINTER_REGNUM
6779 || REGNO (x) == FLAGS_REG
6780 || REGNO (x) == FPSR_REG)
6783 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6786 if (code == 'w' || MMX_REG_P (x))
6788 else if (code == 'b')
6790 else if (code == 'k')
6792 else if (code == 'q')
6794 else if (code == 'y')
6796 else if (code == 'h')
6799 code = GET_MODE_SIZE (GET_MODE (x));
6801 /* Irritatingly, AMD extended registers use different naming convention
6802 from the normal registers. */
6803 if (REX_INT_REG_P (x))
6810 error ("extended registers have no high halves");
6813 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6816 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6819 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6822 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6825 error ("unsupported operand size for extended register");
6833 if (STACK_TOP_P (x))
6835 fputs ("st(0)", file);
6842 if (! ANY_FP_REG_P (x))
6843 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6847 fputs (hi_reg_name[REGNO (x)], file);
6850 fputs (qi_reg_name[REGNO (x)], file);
6853 fputs (qi_high_reg_name[REGNO (x)], file);
6860 /* Locate some local-dynamic symbol still in use by this function
6861 so that we can print its name in some tls_local_dynamic_base
6865 get_some_local_dynamic_name ()
6869 if (cfun->machine->some_ld_name)
6870 return cfun->machine->some_ld_name;
6872 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6874 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6875 return cfun->machine->some_ld_name;
6881 get_some_local_dynamic_name_1 (px, data)
6883 void *data ATTRIBUTE_UNUSED;
6887 if (GET_CODE (x) == SYMBOL_REF
6888 && local_dynamic_symbolic_operand (x, Pmode))
6890 cfun->machine->some_ld_name = XSTR (x, 0);
6898 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6899 C -- print opcode suffix for set/cmov insn.
6900 c -- like C, but print reversed condition
6901 F,f -- likewise, but for floating-point.
6902 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6904 R -- print the prefix for register names.
6905 z -- print the opcode suffix for the size of the current operand.
6906 * -- print a star (in certain assembler syntax)
6907 A -- print an absolute memory reference.
6908 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6909 s -- print a shift double count, followed by the assemblers argument
6911 b -- print the QImode name of the register for the indicated operand.
6912 %b0 would print %al if operands[0] is reg 0.
6913 w -- likewise, print the HImode name of the register.
6914 k -- likewise, print the SImode name of the register.
6915 q -- likewise, print the DImode name of the register.
6916 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6917 y -- print "st(0)" instead of "st" as a register.
6918 D -- print condition for SSE cmp instruction.
6919 P -- if PIC, print an @PLT suffix.
6920 X -- don't print any sort of PIC '@' suffix for a symbol.
6921 & -- print some in-use local-dynamic symbol name.
6925 print_operand (file, x, code)
6935 if (ASSEMBLER_DIALECT == ASM_ATT)
6940 assemble_name (file, get_some_local_dynamic_name ());
6944 if (ASSEMBLER_DIALECT == ASM_ATT)
6946 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6948 /* Intel syntax. For absolute addresses, registers should not
6949 be surrounded by braces. */
6950 if (GET_CODE (x) != REG)
6953 PRINT_OPERAND (file, x, 0);
6961 PRINT_OPERAND (file, x, 0);
6966 if (ASSEMBLER_DIALECT == ASM_ATT)
6971 if (ASSEMBLER_DIALECT == ASM_ATT)
6976 if (ASSEMBLER_DIALECT == ASM_ATT)
6981 if (ASSEMBLER_DIALECT == ASM_ATT)
6986 if (ASSEMBLER_DIALECT == ASM_ATT)
6991 if (ASSEMBLER_DIALECT == ASM_ATT)
6996 /* 387 opcodes don't get size suffixes if the operands are
6998 if (STACK_REG_P (x))
7001 /* Likewise if using Intel opcodes. */
7002 if (ASSEMBLER_DIALECT == ASM_INTEL)
7005 /* This is the size of op from size of operand. */
7006 switch (GET_MODE_SIZE (GET_MODE (x)))
7009 #ifdef HAVE_GAS_FILDS_FISTS
7015 if (GET_MODE (x) == SFmode)
7030 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7032 #ifdef GAS_MNEMONICS
7058 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7060 PRINT_OPERAND (file, x, 0);
7066 /* Little bit of braindamage here. The SSE compare instructions
7067 does use completely different names for the comparisons that the
7068 fp conditional moves. */
7069 switch (GET_CODE (x))
7084 fputs ("unord", file);
7088 fputs ("neq", file);
7092 fputs ("nlt", file);
7096 fputs ("nle", file);
7099 fputs ("ord", file);
7107 #ifdef CMOV_SUN_AS_SYNTAX
7108 if (ASSEMBLER_DIALECT == ASM_ATT)
7110 switch (GET_MODE (x))
7112 case HImode: putc ('w', file); break;
7114 case SFmode: putc ('l', file); break;
7116 case DFmode: putc ('q', file); break;
7124 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7127 #ifdef CMOV_SUN_AS_SYNTAX
7128 if (ASSEMBLER_DIALECT == ASM_ATT)
7131 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7134 /* Like above, but reverse condition */
7136 /* Check to see if argument to %c is really a constant
7137 and not a condition code which needs to be reversed. */
7138 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7140 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7143 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7146 #ifdef CMOV_SUN_AS_SYNTAX
7147 if (ASSEMBLER_DIALECT == ASM_ATT)
7150 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7156 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7159 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7162 int pred_val = INTVAL (XEXP (x, 0));
7164 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7165 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7167 int taken = pred_val > REG_BR_PROB_BASE / 2;
7168 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7170 /* Emit hints only in the case default branch prediction
7171 heuristics would fail. */
7172 if (taken != cputaken)
7174 /* We use 3e (DS) prefix for taken branches and
7175 2e (CS) prefix for not taken branches. */
7177 fputs ("ds ; ", file);
7179 fputs ("cs ; ", file);
7186 output_operand_lossage ("invalid operand code `%c'", code);
7190 if (GET_CODE (x) == REG)
7192 PRINT_REG (x, code, file);
7195 else if (GET_CODE (x) == MEM)
7197 /* No `byte ptr' prefix for call instructions. */
7198 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7201 switch (GET_MODE_SIZE (GET_MODE (x)))
7203 case 1: size = "BYTE"; break;
7204 case 2: size = "WORD"; break;
7205 case 4: size = "DWORD"; break;
7206 case 8: size = "QWORD"; break;
7207 case 12: size = "XWORD"; break;
7208 case 16: size = "XMMWORD"; break;
7213 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7216 else if (code == 'w')
7218 else if (code == 'k')
7222 fputs (" PTR ", file);
7226 if (flag_pic && CONSTANT_ADDRESS_P (x))
7227 output_pic_addr_const (file, x, code);
7228 /* Avoid (%rip) for call operands. */
7229 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7230 && GET_CODE (x) != CONST_INT)
7231 output_addr_const (file, x);
7232 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7233 output_operand_lossage ("invalid constraints for operand");
7238 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7243 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7244 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7246 if (ASSEMBLER_DIALECT == ASM_ATT)
7248 fprintf (file, "0x%lx", l);
7251 /* These float cases don't actually occur as immediate operands. */
7252 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7256 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7257 fprintf (file, "%s", dstr);
7260 else if (GET_CODE (x) == CONST_DOUBLE
7261 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7265 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7266 fprintf (file, "%s", dstr);
7273 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7275 if (ASSEMBLER_DIALECT == ASM_ATT)
7278 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7279 || GET_CODE (x) == LABEL_REF)
7281 if (ASSEMBLER_DIALECT == ASM_ATT)
7284 fputs ("OFFSET FLAT:", file);
7287 if (GET_CODE (x) == CONST_INT)
7288 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7290 output_pic_addr_const (file, x, code);
7292 output_addr_const (file, x);
7296 /* Print a memory operand whose address is ADDR. */
7299 print_operand_address (file, addr)
7303 struct ix86_address parts;
7304 rtx base, index, disp;
7307 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7309 if (ASSEMBLER_DIALECT == ASM_INTEL)
7310 fputs ("DWORD PTR ", file);
7311 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7314 fputs ("fs:0", file);
7316 fputs ("gs:0", file);
7320 if (! ix86_decompose_address (addr, &parts))
7324 index = parts.index;
7326 scale = parts.scale;
7328 if (!base && !index)
7330 /* Displacement only requires special attention. */
7332 if (GET_CODE (disp) == CONST_INT)
7334 if (ASSEMBLER_DIALECT == ASM_INTEL)
7336 if (USER_LABEL_PREFIX[0] == 0)
7338 fputs ("ds:", file);
7340 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7343 output_pic_addr_const (file, addr, 0);
7345 output_addr_const (file, addr);
7347 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7349 && ((GET_CODE (addr) == SYMBOL_REF
7350 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7351 || GET_CODE (addr) == LABEL_REF
7352 || (GET_CODE (addr) == CONST
7353 && GET_CODE (XEXP (addr, 0)) == PLUS
7354 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7355 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7356 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7357 fputs ("(%rip)", file);
7361 if (ASSEMBLER_DIALECT == ASM_ATT)
7366 output_pic_addr_const (file, disp, 0);
7367 else if (GET_CODE (disp) == LABEL_REF)
7368 output_asm_label (disp);
7370 output_addr_const (file, disp);
7375 PRINT_REG (base, 0, file);
7379 PRINT_REG (index, 0, file);
7381 fprintf (file, ",%d", scale);
7387 rtx offset = NULL_RTX;
7391 /* Pull out the offset of a symbol; print any symbol itself. */
7392 if (GET_CODE (disp) == CONST
7393 && GET_CODE (XEXP (disp, 0)) == PLUS
7394 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7396 offset = XEXP (XEXP (disp, 0), 1);
7397 disp = gen_rtx_CONST (VOIDmode,
7398 XEXP (XEXP (disp, 0), 0));
7402 output_pic_addr_const (file, disp, 0);
7403 else if (GET_CODE (disp) == LABEL_REF)
7404 output_asm_label (disp);
7405 else if (GET_CODE (disp) == CONST_INT)
7408 output_addr_const (file, disp);
7414 PRINT_REG (base, 0, file);
7417 if (INTVAL (offset) >= 0)
7419 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7423 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7430 PRINT_REG (index, 0, file);
7432 fprintf (file, "*%d", scale);
7440 output_addr_const_extra (file, x)
7446 if (GET_CODE (x) != UNSPEC)
7449 op = XVECEXP (x, 0, 0);
7450 switch (XINT (x, 1))
7452 case UNSPEC_GOTTPOFF:
7453 output_addr_const (file, op);
7454 /* FIXME: This might be @TPOFF in Sun ld. */
7455 fputs ("@GOTTPOFF", file);
7458 output_addr_const (file, op);
7459 fputs ("@TPOFF", file);
7462 output_addr_const (file, op);
7464 fputs ("@TPOFF", file);
7466 fputs ("@NTPOFF", file);
7469 output_addr_const (file, op);
7470 fputs ("@DTPOFF", file);
7472 case UNSPEC_GOTNTPOFF:
7473 output_addr_const (file, op);
7475 fputs ("@GOTTPOFF(%rip)", file);
7477 fputs ("@GOTNTPOFF", file);
7479 case UNSPEC_INDNTPOFF:
7480 output_addr_const (file, op);
7481 fputs ("@INDNTPOFF", file);
7491 /* Split one or more DImode RTL references into pairs of SImode
7492 references. The RTL can be REG, offsettable MEM, integer constant, or
7493 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7494 split and "num" is its length. lo_half and hi_half are output arrays
7495 that parallel "operands". */
7498 split_di (operands, num, lo_half, hi_half)
7501 rtx lo_half[], hi_half[];
7505 rtx op = operands[num];
7507 /* simplify_subreg refuse to split volatile memory addresses,
7508 but we still have to handle it. */
7509 if (GET_CODE (op) == MEM)
7511 lo_half[num] = adjust_address (op, SImode, 0);
7512 hi_half[num] = adjust_address (op, SImode, 4);
7516 lo_half[num] = simplify_gen_subreg (SImode, op,
7517 GET_MODE (op) == VOIDmode
7518 ? DImode : GET_MODE (op), 0);
7519 hi_half[num] = simplify_gen_subreg (SImode, op,
7520 GET_MODE (op) == VOIDmode
7521 ? DImode : GET_MODE (op), 4);
7525 /* Split one or more TImode RTL references into pairs of SImode
7526 references. The RTL can be REG, offsettable MEM, integer constant, or
7527 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7528 split and "num" is its length. lo_half and hi_half are output arrays
7529 that parallel "operands". */
7532 split_ti (operands, num, lo_half, hi_half)
7535 rtx lo_half[], hi_half[];
7539 rtx op = operands[num];
7541 /* simplify_subreg refuse to split volatile memory addresses, but we
7542 still have to handle it. */
7543 if (GET_CODE (op) == MEM)
7545 lo_half[num] = adjust_address (op, DImode, 0);
7546 hi_half[num] = adjust_address (op, DImode, 8);
7550 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7551 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7556 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7557 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7558 is the expression of the binary operation. The output may either be
7559 emitted here, or returned to the caller, like all output_* functions.
7561 There is no guarantee that the operands are the same mode, as they
7562 might be within FLOAT or FLOAT_EXTEND expressions. */
7564 #ifndef SYSV386_COMPAT
7565 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7566 wants to fix the assemblers because that causes incompatibility
7567 with gcc. No-one wants to fix gcc because that causes
7568 incompatibility with assemblers... You can use the option of
7569 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7570 #define SYSV386_COMPAT 1
7574 output_387_binary_op (insn, operands)
7578 static char buf[30];
7581 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7583 #ifdef ENABLE_CHECKING
7584 /* Even if we do not want to check the inputs, this documents input
7585 constraints. Which helps in understanding the following code. */
7586 if (STACK_REG_P (operands[0])
7587 && ((REG_P (operands[1])
7588 && REGNO (operands[0]) == REGNO (operands[1])
7589 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7590 || (REG_P (operands[2])
7591 && REGNO (operands[0]) == REGNO (operands[2])
7592 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7593 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7599 switch (GET_CODE (operands[3]))
7602 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7603 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7611 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7612 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7620 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7621 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7629 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7630 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7644 if (GET_MODE (operands[0]) == SFmode)
7645 strcat (buf, "ss\t{%2, %0|%0, %2}");
7647 strcat (buf, "sd\t{%2, %0|%0, %2}");
7652 switch (GET_CODE (operands[3]))
7656 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7658 rtx temp = operands[2];
7659 operands[2] = operands[1];
7663 /* know operands[0] == operands[1]. */
7665 if (GET_CODE (operands[2]) == MEM)
7671 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7673 if (STACK_TOP_P (operands[0]))
7674 /* How is it that we are storing to a dead operand[2]?
7675 Well, presumably operands[1] is dead too. We can't
7676 store the result to st(0) as st(0) gets popped on this
7677 instruction. Instead store to operands[2] (which I
7678 think has to be st(1)). st(1) will be popped later.
7679 gcc <= 2.8.1 didn't have this check and generated
7680 assembly code that the Unixware assembler rejected. */
7681 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7683 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7687 if (STACK_TOP_P (operands[0]))
7688 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7690 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7695 if (GET_CODE (operands[1]) == MEM)
7701 if (GET_CODE (operands[2]) == MEM)
7707 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7710 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7711 derived assemblers, confusingly reverse the direction of
7712 the operation for fsub{r} and fdiv{r} when the
7713 destination register is not st(0). The Intel assembler
7714 doesn't have this brain damage. Read !SYSV386_COMPAT to
7715 figure out what the hardware really does. */
7716 if (STACK_TOP_P (operands[0]))
7717 p = "{p\t%0, %2|rp\t%2, %0}";
7719 p = "{rp\t%2, %0|p\t%0, %2}";
7721 if (STACK_TOP_P (operands[0]))
7722 /* As above for fmul/fadd, we can't store to st(0). */
7723 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7725 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7730 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7733 if (STACK_TOP_P (operands[0]))
7734 p = "{rp\t%0, %1|p\t%1, %0}";
7736 p = "{p\t%1, %0|rp\t%0, %1}";
7738 if (STACK_TOP_P (operands[0]))
7739 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7741 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7746 if (STACK_TOP_P (operands[0]))
7748 if (STACK_TOP_P (operands[1]))
7749 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7751 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7754 else if (STACK_TOP_P (operands[1]))
7757 p = "{\t%1, %0|r\t%0, %1}";
7759 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7765 p = "{r\t%2, %0|\t%0, %2}";
7767 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7780 /* Output code to initialize control word copies used by
7781 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7782 is set to control word rounding downwards. */
7784 emit_i387_cw_initialization (normal, round_down)
7785 rtx normal, round_down;
7787 rtx reg = gen_reg_rtx (HImode);
7789 emit_insn (gen_x86_fnstcw_1 (normal));
7790 emit_move_insn (reg, normal);
7791 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7793 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7795 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7796 emit_move_insn (round_down, reg);
7799 /* Output code for INSN to convert a float to a signed int. OPERANDS
7800 are the insn operands. The output may be [HSD]Imode and the input
7801 operand may be [SDX]Fmode. */
7804 output_fix_trunc (insn, operands)
7808 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7809 int dimode_p = GET_MODE (operands[0]) == DImode;
7811 /* Jump through a hoop or two for DImode, since the hardware has no
7812 non-popping instruction. We used to do this a different way, but
7813 that was somewhat fragile and broke with post-reload splitters. */
7814 if (dimode_p && !stack_top_dies)
7815 output_asm_insn ("fld\t%y1", operands);
7817 if (!STACK_TOP_P (operands[1]))
7820 if (GET_CODE (operands[0]) != MEM)
7823 output_asm_insn ("fldcw\t%3", operands);
7824 if (stack_top_dies || dimode_p)
7825 output_asm_insn ("fistp%z0\t%0", operands);
7827 output_asm_insn ("fist%z0\t%0", operands);
7828 output_asm_insn ("fldcw\t%2", operands);
7833 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7834 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7835 when fucom should be used. */
7838 output_fp_compare (insn, operands, eflags_p, unordered_p)
7841 int eflags_p, unordered_p;
7844 rtx cmp_op0 = operands[0];
7845 rtx cmp_op1 = operands[1];
7846 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7851 cmp_op1 = operands[2];
7855 if (GET_MODE (operands[0]) == SFmode)
7857 return "ucomiss\t{%1, %0|%0, %1}";
7859 return "comiss\t{%1, %0|%0, %1}";
7862 return "ucomisd\t{%1, %0|%0, %1}";
7864 return "comisd\t{%1, %0|%0, %1}";
7867 if (! STACK_TOP_P (cmp_op0))
7870 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7872 if (STACK_REG_P (cmp_op1)
7874 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7875 && REGNO (cmp_op1) != FIRST_STACK_REG)
7877 /* If both the top of the 387 stack dies, and the other operand
7878 is also a stack register that dies, then this must be a
7879 `fcompp' float compare */
7883 /* There is no double popping fcomi variant. Fortunately,
7884 eflags is immune from the fstp's cc clobbering. */
7886 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7888 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7896 return "fucompp\n\tfnstsw\t%0";
7898 return "fcompp\n\tfnstsw\t%0";
7911 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7913 static const char * const alt[24] =
7925 "fcomi\t{%y1, %0|%0, %y1}",
7926 "fcomip\t{%y1, %0|%0, %y1}",
7927 "fucomi\t{%y1, %0|%0, %y1}",
7928 "fucomip\t{%y1, %0|%0, %y1}",
7935 "fcom%z2\t%y2\n\tfnstsw\t%0",
7936 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7937 "fucom%z2\t%y2\n\tfnstsw\t%0",
7938 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7940 "ficom%z2\t%y2\n\tfnstsw\t%0",
7941 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7949 mask = eflags_p << 3;
7950 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7951 mask |= unordered_p << 1;
7952 mask |= stack_top_dies;
7965 ix86_output_addr_vec_elt (file, value)
7969 const char *directive = ASM_LONG;
7974 directive = ASM_QUAD;
7980 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7984 ix86_output_addr_diff_elt (file, value, rel)
7989 fprintf (file, "%s%s%d-%s%d\n",
7990 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7991 else if (HAVE_AS_GOTOFF_IN_DATA)
7992 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7994 else if (TARGET_MACHO)
7995 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7996 machopic_function_base_name () + 1);
7999 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8000 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8003 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8007 ix86_expand_clear (dest)
8012 /* We play register width games, which are only valid after reload. */
8013 if (!reload_completed)
8016 /* Avoid HImode and its attendant prefix byte. */
8017 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8018 dest = gen_rtx_REG (SImode, REGNO (dest));
8020 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8022 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8023 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8025 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8026 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8032 /* X is an unchanging MEM. If it is a constant pool reference, return
8033 the constant pool rtx, else NULL. */
8036 maybe_get_pool_constant (x)
8039 x = i386_simplify_dwarf_addr (XEXP (x, 0));
8041 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8042 return get_pool_constant (x);
8048 ix86_expand_move (mode, operands)
8049 enum machine_mode mode;
8052 int strict = (reload_in_progress || reload_completed);
8053 rtx insn, op0, op1, tmp;
8058 if (tls_symbolic_operand (op1, Pmode))
8060 op1 = legitimize_address (op1, op1, VOIDmode);
8061 if (GET_CODE (op0) == MEM)
8063 tmp = gen_reg_rtx (mode);
8064 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
8068 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8073 rtx temp = ((reload_in_progress
8074 || ((op0 && GET_CODE (op0) == REG)
8076 ? op0 : gen_reg_rtx (Pmode));
8077 op1 = machopic_indirect_data_reference (op1, temp);
8078 op1 = machopic_legitimize_pic_address (op1, mode,
8079 temp == op1 ? 0 : temp);
8083 if (MACHOPIC_INDIRECT)
8084 op1 = machopic_indirect_data_reference (op1, 0);
8088 insn = gen_rtx_SET (VOIDmode, op0, op1);
8092 #endif /* TARGET_MACHO */
8093 if (GET_CODE (op0) == MEM)
8094 op1 = force_reg (Pmode, op1);
8098 if (GET_CODE (temp) != REG)
8099 temp = gen_reg_rtx (Pmode);
8100 temp = legitimize_pic_address (op1, temp);
8108 if (GET_CODE (op0) == MEM
8109 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8110 || !push_operand (op0, mode))
8111 && GET_CODE (op1) == MEM)
8112 op1 = force_reg (mode, op1);
8114 if (push_operand (op0, mode)
8115 && ! general_no_elim_operand (op1, mode))
8116 op1 = copy_to_mode_reg (mode, op1);
8118 /* Force large constants in 64bit compilation into register
8119 to get them CSEed. */
8120 if (TARGET_64BIT && mode == DImode
8121 && immediate_operand (op1, mode)
8122 && !x86_64_zero_extended_value (op1)
8123 && !register_operand (op0, mode)
8124 && optimize && !reload_completed && !reload_in_progress)
8125 op1 = copy_to_mode_reg (mode, op1);
8127 if (FLOAT_MODE_P (mode))
8129 /* If we are loading a floating point constant to a register,
8130 force the value to memory now, since we'll get better code
8131 out the back end. */
8135 else if (GET_CODE (op1) == CONST_DOUBLE
8136 && register_operand (op0, mode))
8137 op1 = validize_mem (force_const_mem (mode, op1));
8141 insn = gen_rtx_SET (VOIDmode, op0, op1);
8147 ix86_expand_vector_move (mode, operands)
8148 enum machine_mode mode;
8151 /* Force constants other than zero into memory. We do not know how
8152 the instructions used to build constants modify the upper 64 bits
8153 of the register, once we have that information we may be able
8154 to handle some of them more efficiently. */
8155 if ((reload_in_progress | reload_completed) == 0
8156 && register_operand (operands[0], mode)
8157 && CONSTANT_P (operands[1]))
8158 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8160 /* Make operand1 a register if it isn't already. */
8162 && !register_operand (operands[0], mode)
8163 && !register_operand (operands[1], mode))
8165 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8166 emit_move_insn (operands[0], temp);
8170 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8173 /* Attempt to expand a binary operator. Make the expansion closer to the
8174 actual machine, then just general_operand, which will allow 3 separate
8175 memory references (one output, two input) in a single insn. */
8178 ix86_expand_binary_operator (code, mode, operands)
8180 enum machine_mode mode;
8183 int matching_memory;
8184 rtx src1, src2, dst, op, clob;
8190 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8191 if (GET_RTX_CLASS (code) == 'c'
8192 && (rtx_equal_p (dst, src2)
8193 || immediate_operand (src1, mode)))
8200 /* If the destination is memory, and we do not have matching source
8201 operands, do things in registers. */
8202 matching_memory = 0;
8203 if (GET_CODE (dst) == MEM)
8205 if (rtx_equal_p (dst, src1))
8206 matching_memory = 1;
8207 else if (GET_RTX_CLASS (code) == 'c'
8208 && rtx_equal_p (dst, src2))
8209 matching_memory = 2;
8211 dst = gen_reg_rtx (mode);
8214 /* Both source operands cannot be in memory. */
8215 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8217 if (matching_memory != 2)
8218 src2 = force_reg (mode, src2);
8220 src1 = force_reg (mode, src1);
8223 /* If the operation is not commutable, source 1 cannot be a constant
8224 or non-matching memory. */
8225 if ((CONSTANT_P (src1)
8226 || (!matching_memory && GET_CODE (src1) == MEM))
8227 && GET_RTX_CLASS (code) != 'c')
8228 src1 = force_reg (mode, src1);
8230 /* If optimizing, copy to regs to improve CSE */
8231 if (optimize && ! no_new_pseudos)
8233 if (GET_CODE (dst) == MEM)
8234 dst = gen_reg_rtx (mode);
8235 if (GET_CODE (src1) == MEM)
8236 src1 = force_reg (mode, src1);
8237 if (GET_CODE (src2) == MEM)
8238 src2 = force_reg (mode, src2);
8241 /* Emit the instruction. */
8243 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8244 if (reload_in_progress)
8246 /* Reload doesn't know about the flags register, and doesn't know that
8247 it doesn't want to clobber it. We can only do this with PLUS. */
8254 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8255 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8258 /* Fix up the destination if needed. */
8259 if (dst != operands[0])
8260 emit_move_insn (operands[0], dst);
8263 /* Return TRUE or FALSE depending on whether the binary operator meets the
8264 appropriate constraints. */
8267 ix86_binary_operator_ok (code, mode, operands)
8269 enum machine_mode mode ATTRIBUTE_UNUSED;
8272 /* Both source operands cannot be in memory. */
8273 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8275 /* If the operation is not commutable, source 1 cannot be a constant. */
8276 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8278 /* If the destination is memory, we must have a matching source operand. */
8279 if (GET_CODE (operands[0]) == MEM
8280 && ! (rtx_equal_p (operands[0], operands[1])
8281 || (GET_RTX_CLASS (code) == 'c'
8282 && rtx_equal_p (operands[0], operands[2]))))
8284 /* If the operation is not commutable and the source 1 is memory, we must
8285 have a matching destination. */
8286 if (GET_CODE (operands[1]) == MEM
8287 && GET_RTX_CLASS (code) != 'c'
8288 && ! rtx_equal_p (operands[0], operands[1]))
8293 /* Attempt to expand a unary operator. Make the expansion closer to the
8294 actual machine, then just general_operand, which will allow 2 separate
8295 memory references (one output, one input) in a single insn. */
8298 ix86_expand_unary_operator (code, mode, operands)
8300 enum machine_mode mode;
8303 int matching_memory;
8304 rtx src, dst, op, clob;
8309 /* If the destination is memory, and we do not have matching source
8310 operands, do things in registers. */
8311 matching_memory = 0;
8312 if (GET_CODE (dst) == MEM)
8314 if (rtx_equal_p (dst, src))
8315 matching_memory = 1;
8317 dst = gen_reg_rtx (mode);
8320 /* When source operand is memory, destination must match. */
8321 if (!matching_memory && GET_CODE (src) == MEM)
8322 src = force_reg (mode, src);
8324 /* If optimizing, copy to regs to improve CSE */
8325 if (optimize && ! no_new_pseudos)
8327 if (GET_CODE (dst) == MEM)
8328 dst = gen_reg_rtx (mode);
8329 if (GET_CODE (src) == MEM)
8330 src = force_reg (mode, src);
8333 /* Emit the instruction. */
8335 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8336 if (reload_in_progress || code == NOT)
8338 /* Reload doesn't know about the flags register, and doesn't know that
8339 it doesn't want to clobber it. */
8346 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8347 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8350 /* Fix up the destination if needed. */
8351 if (dst != operands[0])
8352 emit_move_insn (operands[0], dst);
8355 /* Return TRUE or FALSE depending on whether the unary operator meets the
8356 appropriate constraints. */
8359 ix86_unary_operator_ok (code, mode, operands)
8360 enum rtx_code code ATTRIBUTE_UNUSED;
8361 enum machine_mode mode ATTRIBUTE_UNUSED;
8362 rtx operands[2] ATTRIBUTE_UNUSED;
8364 /* If one of operands is memory, source and destination must match. */
8365 if ((GET_CODE (operands[0]) == MEM
8366 || GET_CODE (operands[1]) == MEM)
8367 && ! rtx_equal_p (operands[0], operands[1]))
8372 /* Return TRUE or FALSE depending on whether the first SET in INSN
8373 has source and destination with matching CC modes, and that the
8374 CC mode is at least as constrained as REQ_MODE. */
8377 ix86_match_ccmode (insn, req_mode)
8379 enum machine_mode req_mode;
8382 enum machine_mode set_mode;
8384 set = PATTERN (insn);
8385 if (GET_CODE (set) == PARALLEL)
8386 set = XVECEXP (set, 0, 0);
8387 if (GET_CODE (set) != SET)
8389 if (GET_CODE (SET_SRC (set)) != COMPARE)
8392 set_mode = GET_MODE (SET_DEST (set));
8396 if (req_mode != CCNOmode
8397 && (req_mode != CCmode
8398 || XEXP (SET_SRC (set), 1) != const0_rtx))
8402 if (req_mode == CCGCmode)
8406 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8410 if (req_mode == CCZmode)
8420 return (GET_MODE (SET_SRC (set)) == set_mode);
8423 /* Generate insn patterns to do an integer compare of OPERANDS. */
8426 ix86_expand_int_compare (code, op0, op1)
8430 enum machine_mode cmpmode;
8433 cmpmode = SELECT_CC_MODE (code, op0, op1);
8434 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8436 /* This is very simple, but making the interface the same as in the
8437 FP case makes the rest of the code easier. */
8438 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8439 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8441 /* Return the test that should be put into the flags user, i.e.
8442 the bcc, scc, or cmov instruction. */
8443 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8446 /* Figure out whether to use ordered or unordered fp comparisons.
8447 Return the appropriate mode to use. */
8450 ix86_fp_compare_mode (code)
8451 enum rtx_code code ATTRIBUTE_UNUSED;
8453 /* ??? In order to make all comparisons reversible, we do all comparisons
8454 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8455 all forms trapping and nontrapping comparisons, we can make inequality
8456 comparisons trapping again, since it results in better code when using
8457 FCOM based compares. */
8458 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8462 ix86_cc_mode (code, op0, op1)
8466 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8467 return ix86_fp_compare_mode (code);
8470 /* Only zero flag is needed. */
8472 case NE: /* ZF!=0 */
8474 /* Codes needing carry flag. */
8475 case GEU: /* CF=0 */
8476 case GTU: /* CF=0 & ZF=0 */
8477 case LTU: /* CF=1 */
8478 case LEU: /* CF=1 | ZF=1 */
8480 /* Codes possibly doable only with sign flag when
8481 comparing against zero. */
8482 case GE: /* SF=OF or SF=0 */
8483 case LT: /* SF<>OF or SF=1 */
8484 if (op1 == const0_rtx)
8487 /* For other cases Carry flag is not required. */
8489 /* Codes doable only with sign flag when comparing
8490 against zero, but we miss jump instruction for it
8491 so we need to use relational tests against overflow
8492 that thus needs to be zero. */
8493 case GT: /* ZF=0 & SF=OF */
8494 case LE: /* ZF=1 | SF<>OF */
8495 if (op1 == const0_rtx)
8499 /* strcmp pattern do (use flags) and combine may ask us for proper
8508 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8511 ix86_use_fcomi_compare (code)
8512 enum rtx_code code ATTRIBUTE_UNUSED;
8514 enum rtx_code swapped_code = swap_condition (code);
8515 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8516 || (ix86_fp_comparison_cost (swapped_code)
8517 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8520 /* Swap, force into registers, or otherwise massage the two operands
8521 to a fp comparison. The operands are updated in place; the new
8522 comparison code is returned. */
8524 static enum rtx_code
8525 ix86_prepare_fp_compare_args (code, pop0, pop1)
8529 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8530 rtx op0 = *pop0, op1 = *pop1;
8531 enum machine_mode op_mode = GET_MODE (op0);
8532 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8534 /* All of the unordered compare instructions only work on registers.
8535 The same is true of the XFmode compare instructions. The same is
8536 true of the fcomi compare instructions. */
8539 && (fpcmp_mode == CCFPUmode
8540 || op_mode == XFmode
8541 || op_mode == TFmode
8542 || ix86_use_fcomi_compare (code)))
8544 op0 = force_reg (op_mode, op0);
8545 op1 = force_reg (op_mode, op1);
8549 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8550 things around if they appear profitable, otherwise force op0
8553 if (standard_80387_constant_p (op0) == 0
8554 || (GET_CODE (op0) == MEM
8555 && ! (standard_80387_constant_p (op1) == 0
8556 || GET_CODE (op1) == MEM)))
8559 tmp = op0, op0 = op1, op1 = tmp;
8560 code = swap_condition (code);
8563 if (GET_CODE (op0) != REG)
8564 op0 = force_reg (op_mode, op0);
8566 if (CONSTANT_P (op1))
8568 if (standard_80387_constant_p (op1))
8569 op1 = force_reg (op_mode, op1);
8571 op1 = validize_mem (force_const_mem (op_mode, op1));
8575 /* Try to rearrange the comparison to make it cheaper. */
8576 if (ix86_fp_comparison_cost (code)
8577 > ix86_fp_comparison_cost (swap_condition (code))
8578 && (GET_CODE (op1) == REG || !no_new_pseudos))
8581 tmp = op0, op0 = op1, op1 = tmp;
8582 code = swap_condition (code);
8583 if (GET_CODE (op0) != REG)
8584 op0 = force_reg (op_mode, op0);
8592 /* Convert comparison codes we use to represent FP comparison to integer
8593 code that will result in proper branch. Return UNKNOWN if no such code
8595 static enum rtx_code
8596 ix86_fp_compare_code_to_integer (code)
8626 /* Split comparison code CODE into comparisons we can do using branch
8627 instructions. BYPASS_CODE is comparison code for branch that will
8628 branch around FIRST_CODE and SECOND_CODE. If some of branches
8629 is not required, set value to NIL.
8630 We never require more than two branches. */
8632 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8633 enum rtx_code code, *bypass_code, *first_code, *second_code;
8639 /* The fcomi comparison sets flags as follows:
8649 case GT: /* GTU - CF=0 & ZF=0 */
8650 case GE: /* GEU - CF=0 */
8651 case ORDERED: /* PF=0 */
8652 case UNORDERED: /* PF=1 */
8653 case UNEQ: /* EQ - ZF=1 */
8654 case UNLT: /* LTU - CF=1 */
8655 case UNLE: /* LEU - CF=1 | ZF=1 */
8656 case LTGT: /* EQ - ZF=0 */
8658 case LT: /* LTU - CF=1 - fails on unordered */
8660 *bypass_code = UNORDERED;
8662 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8664 *bypass_code = UNORDERED;
8666 case EQ: /* EQ - ZF=1 - fails on unordered */
8668 *bypass_code = UNORDERED;
8670 case NE: /* NE - ZF=0 - fails on unordered */
8672 *second_code = UNORDERED;
8674 case UNGE: /* GEU - CF=0 - fails on unordered */
8676 *second_code = UNORDERED;
8678 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8680 *second_code = UNORDERED;
8685 if (!TARGET_IEEE_FP)
8692 /* Return cost of comparison done fcom + arithmetics operations on AX.
8693 All following functions do use number of instructions as a cost metrics.
8694 In future this should be tweaked to compute bytes for optimize_size and
8695 take into account performance of various instructions on various CPUs. */
8697 ix86_fp_comparison_arithmetics_cost (code)
8700 if (!TARGET_IEEE_FP)
8702 /* The cost of code output by ix86_expand_fp_compare. */
8730 /* Return cost of comparison done using fcomi operation.
8731 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8733 ix86_fp_comparison_fcomi_cost (code)
8736 enum rtx_code bypass_code, first_code, second_code;
8737 /* Return arbitrarily high cost when instruction is not supported - this
8738 prevents gcc from using it. */
8741 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8742 return (bypass_code != NIL || second_code != NIL) + 2;
8745 /* Return cost of comparison done using sahf operation.
8746 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8748 ix86_fp_comparison_sahf_cost (code)
8751 enum rtx_code bypass_code, first_code, second_code;
8752 /* Return arbitrarily high cost when instruction is not preferred - this
8753 avoids gcc from using it. */
8754 if (!TARGET_USE_SAHF && !optimize_size)
8756 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8757 return (bypass_code != NIL || second_code != NIL) + 3;
8760 /* Compute cost of the comparison done using any method.
8761 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8763 ix86_fp_comparison_cost (code)
8766 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8769 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8770 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8772 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8773 if (min > sahf_cost)
8775 if (min > fcomi_cost)
8780 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8783 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8785 rtx op0, op1, scratch;
8789 enum machine_mode fpcmp_mode, intcmp_mode;
8791 int cost = ix86_fp_comparison_cost (code);
8792 enum rtx_code bypass_code, first_code, second_code;
8794 fpcmp_mode = ix86_fp_compare_mode (code);
8795 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8798 *second_test = NULL_RTX;
8800 *bypass_test = NULL_RTX;
8802 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8804 /* Do fcomi/sahf based test when profitable. */
8805 if ((bypass_code == NIL || bypass_test)
8806 && (second_code == NIL || second_test)
8807 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8811 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8812 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8818 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8819 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8821 scratch = gen_reg_rtx (HImode);
8822 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8823 emit_insn (gen_x86_sahf_1 (scratch));
8826 /* The FP codes work out to act like unsigned. */
8827 intcmp_mode = fpcmp_mode;
8829 if (bypass_code != NIL)
8830 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8831 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8833 if (second_code != NIL)
8834 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8835 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8840 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8841 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8842 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8844 scratch = gen_reg_rtx (HImode);
8845 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8847 /* In the unordered case, we have to check C2 for NaN's, which
8848 doesn't happen to work out to anything nice combination-wise.
8849 So do some bit twiddling on the value we've got in AH to come
8850 up with an appropriate set of condition codes. */
8852 intcmp_mode = CCNOmode;
8857 if (code == GT || !TARGET_IEEE_FP)
8859 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8864 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8865 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8866 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8867 intcmp_mode = CCmode;
8873 if (code == LT && TARGET_IEEE_FP)
8875 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8876 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8877 intcmp_mode = CCmode;
8882 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8888 if (code == GE || !TARGET_IEEE_FP)
8890 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8895 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8896 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8903 if (code == LE && TARGET_IEEE_FP)
8905 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8906 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8907 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8908 intcmp_mode = CCmode;
8913 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8919 if (code == EQ && TARGET_IEEE_FP)
8921 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8922 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8923 intcmp_mode = CCmode;
8928 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8935 if (code == NE && TARGET_IEEE_FP)
8937 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8938 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8944 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8950 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8954 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8963 /* Return the test that should be put into the flags user, i.e.
8964 the bcc, scc, or cmov instruction. */
8965 return gen_rtx_fmt_ee (code, VOIDmode,
8966 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8971 ix86_expand_compare (code, second_test, bypass_test)
8973 rtx *second_test, *bypass_test;
8976 op0 = ix86_compare_op0;
8977 op1 = ix86_compare_op1;
8980 *second_test = NULL_RTX;
8982 *bypass_test = NULL_RTX;
8984 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8985 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8986 second_test, bypass_test);
8988 ret = ix86_expand_int_compare (code, op0, op1);
8993 /* Return true if the CODE will result in nontrivial jump sequence. */
8995 ix86_fp_jump_nontrivial_p (code)
8998 enum rtx_code bypass_code, first_code, second_code;
9001 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9002 return bypass_code != NIL || second_code != NIL;
9006 ix86_expand_branch (code, label)
9012 switch (GET_MODE (ix86_compare_op0))
9018 tmp = ix86_expand_compare (code, NULL, NULL);
9019 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9020 gen_rtx_LABEL_REF (VOIDmode, label),
9022 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9032 enum rtx_code bypass_code, first_code, second_code;
9034 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9037 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9039 /* Check whether we will use the natural sequence with one jump. If
9040 so, we can expand jump early. Otherwise delay expansion by
9041 creating compound insn to not confuse optimizers. */
9042 if (bypass_code == NIL && second_code == NIL
9045 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9046 gen_rtx_LABEL_REF (VOIDmode, label),
9051 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9052 ix86_compare_op0, ix86_compare_op1);
9053 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9054 gen_rtx_LABEL_REF (VOIDmode, label),
9056 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9058 use_fcomi = ix86_use_fcomi_compare (code);
9059 vec = rtvec_alloc (3 + !use_fcomi);
9060 RTVEC_ELT (vec, 0) = tmp;
9062 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9064 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9067 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9069 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9077 /* Expand DImode branch into multiple compare+branch. */
9079 rtx lo[2], hi[2], label2;
9080 enum rtx_code code1, code2, code3;
9082 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9084 tmp = ix86_compare_op0;
9085 ix86_compare_op0 = ix86_compare_op1;
9086 ix86_compare_op1 = tmp;
9087 code = swap_condition (code);
9089 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9090 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9092 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9093 avoid two branches. This costs one extra insn, so disable when
9094 optimizing for size. */
9096 if ((code == EQ || code == NE)
9098 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9103 if (hi[1] != const0_rtx)
9104 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9105 NULL_RTX, 0, OPTAB_WIDEN);
9108 if (lo[1] != const0_rtx)
9109 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9110 NULL_RTX, 0, OPTAB_WIDEN);
9112 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9113 NULL_RTX, 0, OPTAB_WIDEN);
9115 ix86_compare_op0 = tmp;
9116 ix86_compare_op1 = const0_rtx;
9117 ix86_expand_branch (code, label);
9121 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9122 op1 is a constant and the low word is zero, then we can just
9123 examine the high word. */
9125 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9128 case LT: case LTU: case GE: case GEU:
9129 ix86_compare_op0 = hi[0];
9130 ix86_compare_op1 = hi[1];
9131 ix86_expand_branch (code, label);
9137 /* Otherwise, we need two or three jumps. */
9139 label2 = gen_label_rtx ();
9142 code2 = swap_condition (code);
9143 code3 = unsigned_condition (code);
9147 case LT: case GT: case LTU: case GTU:
9150 case LE: code1 = LT; code2 = GT; break;
9151 case GE: code1 = GT; code2 = LT; break;
9152 case LEU: code1 = LTU; code2 = GTU; break;
9153 case GEU: code1 = GTU; code2 = LTU; break;
9155 case EQ: code1 = NIL; code2 = NE; break;
9156 case NE: code2 = NIL; break;
9164 * if (hi(a) < hi(b)) goto true;
9165 * if (hi(a) > hi(b)) goto false;
9166 * if (lo(a) < lo(b)) goto true;
9170 ix86_compare_op0 = hi[0];
9171 ix86_compare_op1 = hi[1];
9174 ix86_expand_branch (code1, label);
9176 ix86_expand_branch (code2, label2);
9178 ix86_compare_op0 = lo[0];
9179 ix86_compare_op1 = lo[1];
9180 ix86_expand_branch (code3, label);
9183 emit_label (label2);
9192 /* Split branch based on floating point condition. */
9194 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9196 rtx op1, op2, target1, target2, tmp;
9199 rtx label = NULL_RTX;
9201 int bypass_probability = -1, second_probability = -1, probability = -1;
9204 if (target2 != pc_rtx)
9207 code = reverse_condition_maybe_unordered (code);
9212 condition = ix86_expand_fp_compare (code, op1, op2,
9213 tmp, &second, &bypass);
9215 if (split_branch_probability >= 0)
9217 /* Distribute the probabilities across the jumps.
9218 Assume the BYPASS and SECOND to be always test
9220 probability = split_branch_probability;
9222 /* Value of 1 is low enough to make no need for probability
9223 to be updated. Later we may run some experiments and see
9224 if unordered values are more frequent in practice. */
9226 bypass_probability = 1;
9228 second_probability = 1;
9230 if (bypass != NULL_RTX)
9232 label = gen_label_rtx ();
9233 i = emit_jump_insn (gen_rtx_SET
9235 gen_rtx_IF_THEN_ELSE (VOIDmode,
9237 gen_rtx_LABEL_REF (VOIDmode,
9240 if (bypass_probability >= 0)
9242 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9243 GEN_INT (bypass_probability),
9246 i = emit_jump_insn (gen_rtx_SET
9248 gen_rtx_IF_THEN_ELSE (VOIDmode,
9249 condition, target1, target2)));
9250 if (probability >= 0)
9252 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9253 GEN_INT (probability),
9255 if (second != NULL_RTX)
9257 i = emit_jump_insn (gen_rtx_SET
9259 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9261 if (second_probability >= 0)
9263 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9264 GEN_INT (second_probability),
9267 if (label != NULL_RTX)
9272 ix86_expand_setcc (code, dest)
9276 rtx ret, tmp, tmpreg;
9277 rtx second_test, bypass_test;
9279 if (GET_MODE (ix86_compare_op0) == DImode
9281 return 0; /* FAIL */
9283 if (GET_MODE (dest) != QImode)
9286 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9287 PUT_MODE (ret, QImode);
9292 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9293 if (bypass_test || second_test)
9295 rtx test = second_test;
9297 rtx tmp2 = gen_reg_rtx (QImode);
9304 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9306 PUT_MODE (test, QImode);
9307 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9310 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9312 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9315 return 1; /* DONE */
9318 /* Expand comparison setting or clearing carry flag. Return true when successful
9319 and set pop for the operation. */
9321 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9325 enum machine_mode mode =
9326 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9328 /* Do not handle DImode compares that go trought special path. Also we can't
9329 deal with FP compares yet. This is possible to add. */
9330 if ((mode == DImode && !TARGET_64BIT))
9332 if (FLOAT_MODE_P (mode))
9334 rtx second_test = NULL, bypass_test = NULL;
9335 rtx compare_op, compare_seq;
9337 /* Shortcut: following common codes never translate into carry flag compares. */
9338 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9339 || code == ORDERED || code == UNORDERED)
9342 /* These comparisons require zero flag; swap operands so they won't. */
9343 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9349 code = swap_condition (code);
9352 /* Try to expand the comparsion and verify that we end up with carry flag
9353 based comparsion. This is fails to be true only when we decide to expand
9354 comparsion using arithmetic that is not too common scenario. */
9356 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9357 &second_test, &bypass_test);
9358 compare_seq = get_insns ();
9361 if (second_test || bypass_test)
9363 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9364 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9365 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9367 code = GET_CODE (compare_op);
9368 if (code != LTU && code != GEU)
9370 emit_insn (compare_seq);
9374 if (!INTEGRAL_MODE_P (mode))
9382 /* Convert a==0 into (unsigned)a<1. */
9385 if (op1 != const0_rtx)
9388 code = (code == EQ ? LTU : GEU);
9391 /* Convert a>b into b<a or a>=b-1. */
9394 if (GET_CODE (op1) == CONST_INT)
9396 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9397 /* Bail out on overflow. We still can swap operands but that
9398 would force loading of the constant into register. */
9399 if (op1 == const0_rtx
9400 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9402 code = (code == GTU ? GEU : LTU);
9409 code = (code == GTU ? LTU : GEU);
9413 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9416 if (mode == DImode || op1 != const0_rtx)
9418 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9419 code = (code == LT ? GEU : LTU);
9423 if (mode == DImode || op1 != constm1_rtx)
9425 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9426 code = (code == LE ? GEU : LTU);
9432 ix86_compare_op0 = op0;
9433 ix86_compare_op1 = op1;
9434 *pop = ix86_expand_compare (code, NULL, NULL);
9435 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9441 ix86_expand_int_movcc (operands)
9444 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9445 rtx compare_seq, compare_op;
9446 rtx second_test, bypass_test;
9447 enum machine_mode mode = GET_MODE (operands[0]);
9448 bool sign_bit_compare_p = false;;
9451 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9452 compare_seq = get_insns ();
9455 compare_code = GET_CODE (compare_op);
9457 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9458 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9459 sign_bit_compare_p = true;
9461 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9462 HImode insns, we'd be swallowed in word prefix ops. */
9464 if ((mode != HImode || TARGET_FAST_PREFIX)
9465 && (mode != DImode || TARGET_64BIT)
9466 && GET_CODE (operands[2]) == CONST_INT
9467 && GET_CODE (operands[3]) == CONST_INT)
9469 rtx out = operands[0];
9470 HOST_WIDE_INT ct = INTVAL (operands[2]);
9471 HOST_WIDE_INT cf = INTVAL (operands[3]);
9475 /* Sign bit compares are better done using shifts than we do by using
9477 if (sign_bit_compare_p
9478 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9479 ix86_compare_op1, &compare_op))
9481 /* Detect overlap between destination and compare sources. */
9484 if (!sign_bit_compare_p)
9488 compare_code = GET_CODE (compare_op);
9490 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9491 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9494 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9497 /* To simplify rest of code, restrict to the GEU case. */
9498 if (compare_code == LTU)
9500 HOST_WIDE_INT tmp = ct;
9503 compare_code = reverse_condition (compare_code);
9504 code = reverse_condition (code);
9509 PUT_CODE (compare_op,
9510 reverse_condition_maybe_unordered
9511 (GET_CODE (compare_op)));
9513 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9517 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9518 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9519 tmp = gen_reg_rtx (mode);
9522 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9524 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9528 if (code == GT || code == GE)
9529 code = reverse_condition (code);
9532 HOST_WIDE_INT tmp = ct;
9537 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9538 ix86_compare_op1, VOIDmode, 0, -1);
9551 tmp = expand_simple_binop (mode, PLUS,
9553 copy_rtx (tmp), 1, OPTAB_DIRECT);
9564 tmp = expand_simple_binop (mode, IOR,
9566 copy_rtx (tmp), 1, OPTAB_DIRECT);
9568 else if (diff == -1 && ct)
9578 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9580 tmp = expand_simple_binop (mode, PLUS,
9581 copy_rtx (tmp), GEN_INT (cf),
9582 copy_rtx (tmp), 1, OPTAB_DIRECT);
9590 * andl cf - ct, dest
9600 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9603 tmp = expand_simple_binop (mode, AND,
9605 gen_int_mode (cf - ct, mode),
9606 copy_rtx (tmp), 1, OPTAB_DIRECT);
9608 tmp = expand_simple_binop (mode, PLUS,
9609 copy_rtx (tmp), GEN_INT (ct),
9610 copy_rtx (tmp), 1, OPTAB_DIRECT);
9613 if (!rtx_equal_p (tmp, out))
9614 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9616 return 1; /* DONE */
9622 tmp = ct, ct = cf, cf = tmp;
9624 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9626 /* We may be reversing unordered compare to normal compare, that
9627 is not valid in general (we may convert non-trapping condition
9628 to trapping one), however on i386 we currently emit all
9629 comparisons unordered. */
9630 compare_code = reverse_condition_maybe_unordered (compare_code);
9631 code = reverse_condition_maybe_unordered (code);
9635 compare_code = reverse_condition (compare_code);
9636 code = reverse_condition (code);
9641 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9642 && GET_CODE (ix86_compare_op1) == CONST_INT)
9644 if (ix86_compare_op1 == const0_rtx
9645 && (code == LT || code == GE))
9646 compare_code = code;
9647 else if (ix86_compare_op1 == constm1_rtx)
9651 else if (code == GT)
9656 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9657 if (compare_code != NIL
9658 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9659 && (cf == -1 || ct == -1))
9661 /* If lea code below could be used, only optimize
9662 if it results in a 2 insn sequence. */
9664 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9665 || diff == 3 || diff == 5 || diff == 9)
9666 || (compare_code == LT && ct == -1)
9667 || (compare_code == GE && cf == -1))
9670 * notl op1 (if necessary)
9678 code = reverse_condition (code);
9681 out = emit_store_flag (out, code, ix86_compare_op0,
9682 ix86_compare_op1, VOIDmode, 0, -1);
9684 out = expand_simple_binop (mode, IOR,
9686 out, 1, OPTAB_DIRECT);
9687 if (out != operands[0])
9688 emit_move_insn (operands[0], out);
9690 return 1; /* DONE */
9695 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9696 || diff == 3 || diff == 5 || diff == 9)
9697 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9698 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9704 * lea cf(dest*(ct-cf)),dest
9708 * This also catches the degenerate setcc-only case.
9714 out = emit_store_flag (out, code, ix86_compare_op0,
9715 ix86_compare_op1, VOIDmode, 0, 1);
9718 /* On x86_64 the lea instruction operates on Pmode, so we need
9719 to get arithmetics done in proper mode to match. */
9721 tmp = copy_rtx (out);
9725 out1 = copy_rtx (out);
9726 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9730 tmp = gen_rtx_PLUS (mode, tmp, out1);
9736 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9739 if (!rtx_equal_p (tmp, out))
9742 out = force_operand (tmp, copy_rtx (out));
9744 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9746 if (!rtx_equal_p (out, operands[0]))
9747 emit_move_insn (operands[0], copy_rtx (out));
9749 return 1; /* DONE */
9753 * General case: Jumpful:
9754 * xorl dest,dest cmpl op1, op2
9755 * cmpl op1, op2 movl ct, dest
9757 * decl dest movl cf, dest
9758 * andl (cf-ct),dest 1:
9763 * This is reasonably steep, but branch mispredict costs are
9764 * high on modern cpus, so consider failing only if optimizing
9768 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9769 && BRANCH_COST >= 2)
9775 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9776 /* We may be reversing unordered compare to normal compare,
9777 that is not valid in general (we may convert non-trapping
9778 condition to trapping one), however on i386 we currently
9779 emit all comparisons unordered. */
9780 code = reverse_condition_maybe_unordered (code);
9783 code = reverse_condition (code);
9784 if (compare_code != NIL)
9785 compare_code = reverse_condition (compare_code);
9789 if (compare_code != NIL)
9791 /* notl op1 (if needed)
9796 For x < 0 (resp. x <= -1) there will be no notl,
9797 so if possible swap the constants to get rid of the
9799 True/false will be -1/0 while code below (store flag
9800 followed by decrement) is 0/-1, so the constants need
9801 to be exchanged once more. */
9803 if (compare_code == GE || !cf)
9805 code = reverse_condition (code);
9810 HOST_WIDE_INT tmp = cf;
9815 out = emit_store_flag (out, code, ix86_compare_op0,
9816 ix86_compare_op1, VOIDmode, 0, -1);
9820 out = emit_store_flag (out, code, ix86_compare_op0,
9821 ix86_compare_op1, VOIDmode, 0, 1);
9823 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9824 copy_rtx (out), 1, OPTAB_DIRECT);
9827 out = expand_simple_binop (mode, AND, copy_rtx (out),
9828 gen_int_mode (cf - ct, mode),
9829 copy_rtx (out), 1, OPTAB_DIRECT);
9831 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9832 copy_rtx (out), 1, OPTAB_DIRECT);
9833 if (!rtx_equal_p (out, operands[0]))
9834 emit_move_insn (operands[0], copy_rtx (out));
9836 return 1; /* DONE */
9840 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9842 /* Try a few things more with specific constants and a variable. */
9845 rtx var, orig_out, out, tmp;
9847 if (BRANCH_COST <= 2)
9848 return 0; /* FAIL */
9850 /* If one of the two operands is an interesting constant, load a
9851 constant with the above and mask it in with a logical operation. */
9853 if (GET_CODE (operands[2]) == CONST_INT)
9856 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9857 operands[3] = constm1_rtx, op = and_optab;
9858 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9859 operands[3] = const0_rtx, op = ior_optab;
9861 return 0; /* FAIL */
9863 else if (GET_CODE (operands[3]) == CONST_INT)
9866 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9867 operands[2] = constm1_rtx, op = and_optab;
9868 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9869 operands[2] = const0_rtx, op = ior_optab;
9871 return 0; /* FAIL */
9874 return 0; /* FAIL */
9876 orig_out = operands[0];
9877 tmp = gen_reg_rtx (mode);
9880 /* Recurse to get the constant loaded. */
9881 if (ix86_expand_int_movcc (operands) == 0)
9882 return 0; /* FAIL */
9884 /* Mask in the interesting variable. */
9885 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9887 if (!rtx_equal_p (out, orig_out))
9888 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9890 return 1; /* DONE */
9894 * For comparison with above,
9904 if (! nonimmediate_operand (operands[2], mode))
9905 operands[2] = force_reg (mode, operands[2]);
9906 if (! nonimmediate_operand (operands[3], mode))
9907 operands[3] = force_reg (mode, operands[3]);
9909 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9911 rtx tmp = gen_reg_rtx (mode);
9912 emit_move_insn (tmp, operands[3]);
9915 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9917 rtx tmp = gen_reg_rtx (mode);
9918 emit_move_insn (tmp, operands[2]);
9922 if (! register_operand (operands[2], VOIDmode)
9924 || ! register_operand (operands[3], VOIDmode)))
9925 operands[2] = force_reg (mode, operands[2]);
9928 && ! register_operand (operands[3], VOIDmode))
9929 operands[3] = force_reg (mode, operands[3]);
9931 emit_insn (compare_seq);
9932 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9933 gen_rtx_IF_THEN_ELSE (mode,
9934 compare_op, operands[2],
9937 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9938 gen_rtx_IF_THEN_ELSE (mode,
9940 copy_rtx (operands[3]),
9941 copy_rtx (operands[0]))));
9943 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9944 gen_rtx_IF_THEN_ELSE (mode,
9946 copy_rtx (operands[2]),
9947 copy_rtx (operands[0]))));
9949 return 1; /* DONE */
9953 ix86_expand_fp_movcc (operands)
9958 rtx compare_op, second_test, bypass_test;
9960 /* For SF/DFmode conditional moves based on comparisons
9961 in same mode, we may want to use SSE min/max instructions. */
9962 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9963 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9964 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9965 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9967 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9968 /* We may be called from the post-reload splitter. */
9969 && (!REG_P (operands[0])
9970 || SSE_REG_P (operands[0])
9971 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9973 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9974 code = GET_CODE (operands[1]);
9976 /* See if we have (cross) match between comparison operands and
9977 conditional move operands. */
9978 if (rtx_equal_p (operands[2], op1))
9983 code = reverse_condition_maybe_unordered (code);
9985 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9987 /* Check for min operation. */
9988 if (code == LT || code == UNLE)
9996 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9997 if (memory_operand (op0, VOIDmode))
9998 op0 = force_reg (GET_MODE (operands[0]), op0);
9999 if (GET_MODE (operands[0]) == SFmode)
10000 emit_insn (gen_minsf3 (operands[0], op0, op1));
10002 emit_insn (gen_mindf3 (operands[0], op0, op1));
10005 /* Check for max operation. */
10006 if (code == GT || code == UNGE)
10014 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10015 if (memory_operand (op0, VOIDmode))
10016 op0 = force_reg (GET_MODE (operands[0]), op0);
10017 if (GET_MODE (operands[0]) == SFmode)
10018 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10020 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10024 /* Manage condition to be sse_comparison_operator. In case we are
10025 in non-ieee mode, try to canonicalize the destination operand
10026 to be first in the comparison - this helps reload to avoid extra
10028 if (!sse_comparison_operator (operands[1], VOIDmode)
10029 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10031 rtx tmp = ix86_compare_op0;
10032 ix86_compare_op0 = ix86_compare_op1;
10033 ix86_compare_op1 = tmp;
10034 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10035 VOIDmode, ix86_compare_op0,
10038 /* Similarly try to manage result to be first operand of conditional
10039 move. We also don't support the NE comparison on SSE, so try to
10041 if ((rtx_equal_p (operands[0], operands[3])
10042 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10043 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10045 rtx tmp = operands[2];
10046 operands[2] = operands[3];
10048 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10049 (GET_CODE (operands[1])),
10050 VOIDmode, ix86_compare_op0,
10053 if (GET_MODE (operands[0]) == SFmode)
10054 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10055 operands[2], operands[3],
10056 ix86_compare_op0, ix86_compare_op1));
10058 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10059 operands[2], operands[3],
10060 ix86_compare_op0, ix86_compare_op1));
10064 /* The floating point conditional move instructions don't directly
10065 support conditions resulting from a signed integer comparison. */
10067 code = GET_CODE (operands[1]);
10068 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10070 /* The floating point conditional move instructions don't directly
10071 support signed integer comparisons. */
10073 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10075 if (second_test != NULL || bypass_test != NULL)
10077 tmp = gen_reg_rtx (QImode);
10078 ix86_expand_setcc (code, tmp);
10080 ix86_compare_op0 = tmp;
10081 ix86_compare_op1 = const0_rtx;
10082 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10084 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10086 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10087 emit_move_insn (tmp, operands[3]);
10090 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10092 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10093 emit_move_insn (tmp, operands[2]);
10097 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10098 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10103 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10104 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10109 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10110 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10118 /* Expand conditional increment or decrement using adb/sbb instructions.
10119 The default case using setcc followed by the conditional move can be
10120 done by generic code. */
10122 ix86_expand_int_addcc (operands)
10125 enum rtx_code code = GET_CODE (operands[1]);
10127 rtx val = const0_rtx;
10128 bool fpcmp = false;
10129 enum machine_mode mode = GET_MODE (operands[0]);
10131 if (operands[3] != const1_rtx
10132 && operands[3] != constm1_rtx)
10134 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10135 ix86_compare_op1, &compare_op))
10137 code = GET_CODE (compare_op);
10139 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10140 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10143 code = ix86_fp_compare_code_to_integer (code);
10150 PUT_CODE (compare_op,
10151 reverse_condition_maybe_unordered
10152 (GET_CODE (compare_op)));
10154 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10156 PUT_MODE (compare_op, mode);
10158 /* Construct either adc or sbb insn. */
10159 if ((code == LTU) == (operands[3] == constm1_rtx))
10161 switch (GET_MODE (operands[0]))
10164 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10167 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10170 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10173 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10181 switch (GET_MODE (operands[0]))
10184 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10187 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10190 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10193 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10199 return 1; /* DONE */
10203 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10204 works for floating pointer parameters and nonoffsetable memories.
10205 For pushes, it returns just stack offsets; the values will be saved
10206 in the right order. Maximally three parts are generated. */
10209 ix86_split_to_parts (operand, parts, mode)
10212 enum machine_mode mode;
10217 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10219 size = (GET_MODE_SIZE (mode) + 4) / 8;
10221 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10223 if (size < 2 || size > 3)
10226 /* Optimize constant pool reference to immediates. This is used by fp
10227 moves, that force all constants to memory to allow combining. */
10228 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10230 rtx tmp = maybe_get_pool_constant (operand);
10235 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10237 /* The only non-offsetable memories we handle are pushes. */
10238 if (! push_operand (operand, VOIDmode))
10241 operand = copy_rtx (operand);
10242 PUT_MODE (operand, Pmode);
10243 parts[0] = parts[1] = parts[2] = operand;
10245 else if (!TARGET_64BIT)
10247 if (mode == DImode)
10248 split_di (&operand, 1, &parts[0], &parts[1]);
10251 if (REG_P (operand))
10253 if (!reload_completed)
10255 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10256 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10258 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10260 else if (offsettable_memref_p (operand))
10262 operand = adjust_address (operand, SImode, 0);
10263 parts[0] = operand;
10264 parts[1] = adjust_address (operand, SImode, 4);
10266 parts[2] = adjust_address (operand, SImode, 8);
10268 else if (GET_CODE (operand) == CONST_DOUBLE)
10273 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10278 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10279 parts[2] = gen_int_mode (l[2], SImode);
10282 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10287 parts[1] = gen_int_mode (l[1], SImode);
10288 parts[0] = gen_int_mode (l[0], SImode);
10296 if (mode == TImode)
10297 split_ti (&operand, 1, &parts[0], &parts[1]);
10298 if (mode == XFmode || mode == TFmode)
10300 if (REG_P (operand))
10302 if (!reload_completed)
10304 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10305 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10307 else if (offsettable_memref_p (operand))
10309 operand = adjust_address (operand, DImode, 0);
10310 parts[0] = operand;
10311 parts[1] = adjust_address (operand, SImode, 8);
10313 else if (GET_CODE (operand) == CONST_DOUBLE)
10318 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10319 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10320 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10321 if (HOST_BITS_PER_WIDE_INT >= 64)
10324 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10325 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10328 parts[0] = immed_double_const (l[0], l[1], DImode);
10329 parts[1] = gen_int_mode (l[2], SImode);
10339 /* Emit insns to perform a move or push of DI, DF, and XF values.
10340 Return false when normal moves are needed; true when all required
10341 insns have been emitted. Operands 2-4 contain the input values
10342 int the correct order; operands 5-7 contain the output values. */
10345 ix86_split_long_move (operands)
10351 int collisions = 0;
10352 enum machine_mode mode = GET_MODE (operands[0]);
10354 /* The DFmode expanders may ask us to move double.
10355 For 64bit target this is single move. By hiding the fact
10356 here we simplify i386.md splitters. */
10357 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10359 /* Optimize constant pool reference to immediates. This is used by
10360 fp moves, that force all constants to memory to allow combining. */
10362 if (GET_CODE (operands[1]) == MEM
10363 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10364 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10365 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10366 if (push_operand (operands[0], VOIDmode))
10368 operands[0] = copy_rtx (operands[0]);
10369 PUT_MODE (operands[0], Pmode);
10372 operands[0] = gen_lowpart (DImode, operands[0]);
10373 operands[1] = gen_lowpart (DImode, operands[1]);
10374 emit_move_insn (operands[0], operands[1]);
10378 /* The only non-offsettable memory we handle is push. */
10379 if (push_operand (operands[0], VOIDmode))
10381 else if (GET_CODE (operands[0]) == MEM
10382 && ! offsettable_memref_p (operands[0]))
10385 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10386 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10388 /* When emitting push, take care for source operands on the stack. */
10389 if (push && GET_CODE (operands[1]) == MEM
10390 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10393 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10394 XEXP (part[1][2], 0));
10395 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10396 XEXP (part[1][1], 0));
10399 /* We need to do copy in the right order in case an address register
10400 of the source overlaps the destination. */
10401 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10403 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10405 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10408 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10411 /* Collision in the middle part can be handled by reordering. */
10412 if (collisions == 1 && nparts == 3
10413 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10416 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10417 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10420 /* If there are more collisions, we can't handle it by reordering.
10421 Do an lea to the last part and use only one colliding move. */
10422 else if (collisions > 1)
10425 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
10426 XEXP (part[1][0], 0)));
10427 part[1][0] = change_address (part[1][0],
10428 TARGET_64BIT ? DImode : SImode,
10429 part[0][nparts - 1]);
10430 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
10432 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
10442 /* We use only first 12 bytes of TFmode value, but for pushing we
10443 are required to adjust stack as if we were pushing real 16byte
10445 if (mode == TFmode && !TARGET_64BIT)
10446 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10448 emit_move_insn (part[0][2], part[1][2]);
10453 /* In 64bit mode we don't have 32bit push available. In case this is
10454 register, it is OK - we will just use larger counterpart. We also
10455 retype memory - these comes from attempt to avoid REX prefix on
10456 moving of second half of TFmode value. */
10457 if (GET_MODE (part[1][1]) == SImode)
10459 if (GET_CODE (part[1][1]) == MEM)
10460 part[1][1] = adjust_address (part[1][1], DImode, 0);
10461 else if (REG_P (part[1][1]))
10462 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10465 if (GET_MODE (part[1][0]) == SImode)
10466 part[1][0] = part[1][1];
10469 emit_move_insn (part[0][1], part[1][1]);
10470 emit_move_insn (part[0][0], part[1][0]);
10474 /* Choose correct order to not overwrite the source before it is copied. */
10475 if ((REG_P (part[0][0])
10476 && REG_P (part[1][1])
10477 && (REGNO (part[0][0]) == REGNO (part[1][1])
10479 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10481 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10485 operands[2] = part[0][2];
10486 operands[3] = part[0][1];
10487 operands[4] = part[0][0];
10488 operands[5] = part[1][2];
10489 operands[6] = part[1][1];
10490 operands[7] = part[1][0];
10494 operands[2] = part[0][1];
10495 operands[3] = part[0][0];
10496 operands[5] = part[1][1];
10497 operands[6] = part[1][0];
10504 operands[2] = part[0][0];
10505 operands[3] = part[0][1];
10506 operands[4] = part[0][2];
10507 operands[5] = part[1][0];
10508 operands[6] = part[1][1];
10509 operands[7] = part[1][2];
10513 operands[2] = part[0][0];
10514 operands[3] = part[0][1];
10515 operands[5] = part[1][0];
10516 operands[6] = part[1][1];
10519 emit_move_insn (operands[2], operands[5]);
10520 emit_move_insn (operands[3], operands[6]);
10522 emit_move_insn (operands[4], operands[7]);
10528 ix86_split_ashldi (operands, scratch)
10529 rtx *operands, scratch;
10531 rtx low[2], high[2];
10534 if (GET_CODE (operands[2]) == CONST_INT)
10536 split_di (operands, 2, low, high);
10537 count = INTVAL (operands[2]) & 63;
10541 emit_move_insn (high[0], low[1]);
10542 emit_move_insn (low[0], const0_rtx);
10545 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10549 if (!rtx_equal_p (operands[0], operands[1]))
10550 emit_move_insn (operands[0], operands[1]);
10551 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10552 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10557 if (!rtx_equal_p (operands[0], operands[1]))
10558 emit_move_insn (operands[0], operands[1]);
10560 split_di (operands, 1, low, high);
10562 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10563 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10565 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10567 if (! no_new_pseudos)
10568 scratch = force_reg (SImode, const0_rtx);
10570 emit_move_insn (scratch, const0_rtx);
10572 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10576 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10581 ix86_split_ashrdi (operands, scratch)
10582 rtx *operands, scratch;
10584 rtx low[2], high[2];
10587 if (GET_CODE (operands[2]) == CONST_INT)
10589 split_di (operands, 2, low, high);
10590 count = INTVAL (operands[2]) & 63;
10594 emit_move_insn (low[0], high[1]);
10596 if (! reload_completed)
10597 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10600 emit_move_insn (high[0], low[0]);
10601 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10605 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10609 if (!rtx_equal_p (operands[0], operands[1]))
10610 emit_move_insn (operands[0], operands[1]);
10611 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10612 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10617 if (!rtx_equal_p (operands[0], operands[1]))
10618 emit_move_insn (operands[0], operands[1]);
10620 split_di (operands, 1, low, high);
10622 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10623 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10625 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10627 if (! no_new_pseudos)
10628 scratch = gen_reg_rtx (SImode);
10629 emit_move_insn (scratch, high[0]);
10630 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10631 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10635 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10640 ix86_split_lshrdi (operands, scratch)
10641 rtx *operands, scratch;
10643 rtx low[2], high[2];
10646 if (GET_CODE (operands[2]) == CONST_INT)
10648 split_di (operands, 2, low, high);
10649 count = INTVAL (operands[2]) & 63;
10653 emit_move_insn (low[0], high[1]);
10654 emit_move_insn (high[0], const0_rtx);
10657 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10661 if (!rtx_equal_p (operands[0], operands[1]))
10662 emit_move_insn (operands[0], operands[1]);
10663 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10664 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10669 if (!rtx_equal_p (operands[0], operands[1]))
10670 emit_move_insn (operands[0], operands[1]);
10672 split_di (operands, 1, low, high);
10674 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10675 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10677 /* Heh. By reversing the arguments, we can reuse this pattern. */
10678 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10680 if (! no_new_pseudos)
10681 scratch = force_reg (SImode, const0_rtx);
10683 emit_move_insn (scratch, const0_rtx);
10685 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10689 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10693 /* Helper function for the string operations below. Dest VARIABLE whether
10694 it is aligned to VALUE bytes. If true, jump to the label. */
10696 ix86_expand_aligntest (variable, value)
10700 rtx label = gen_label_rtx ();
10701 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10702 if (GET_MODE (variable) == DImode)
10703 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10705 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10706 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10711 /* Adjust COUNTER by the VALUE. */
10713 ix86_adjust_counter (countreg, value)
10715 HOST_WIDE_INT value;
10717 if (GET_MODE (countreg) == DImode)
10718 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10720 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10723 /* Zero extend possibly SImode EXP to Pmode register. */
10725 ix86_zero_extend_to_Pmode (exp)
10729 if (GET_MODE (exp) == VOIDmode)
10730 return force_reg (Pmode, exp);
10731 if (GET_MODE (exp) == Pmode)
10732 return copy_to_mode_reg (Pmode, exp);
10733 r = gen_reg_rtx (Pmode);
10734 emit_insn (gen_zero_extendsidi2 (r, exp));
10738 /* Expand string move (memcpy) operation. Use i386 string operations when
10739 profitable. expand_clrstr contains similar code. */
10741 ix86_expand_movstr (dst, src, count_exp, align_exp)
10742 rtx dst, src, count_exp, align_exp;
10744 rtx srcreg, destreg, countreg;
10745 enum machine_mode counter_mode;
10746 HOST_WIDE_INT align = 0;
10747 unsigned HOST_WIDE_INT count = 0;
10751 if (GET_CODE (align_exp) == CONST_INT)
10752 align = INTVAL (align_exp);
10754 /* This simple hack avoids all inlining code and simplifies code below. */
10755 if (!TARGET_ALIGN_STRINGOPS)
10758 if (GET_CODE (count_exp) == CONST_INT)
10760 count = INTVAL (count_exp);
10761 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10765 /* Figure out proper mode for counter. For 32bits it is always SImode,
10766 for 64bits use SImode when possible, otherwise DImode.
10767 Set count to number of bytes copied when known at compile time. */
10768 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10769 || x86_64_zero_extended_value (count_exp))
10770 counter_mode = SImode;
10772 counter_mode = DImode;
10776 if (counter_mode != SImode && counter_mode != DImode)
10779 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10780 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10782 emit_insn (gen_cld ());
10784 /* When optimizing for size emit simple rep ; movsb instruction for
10785 counts not divisible by 4. */
10787 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10789 countreg = ix86_zero_extend_to_Pmode (count_exp);
10791 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10792 destreg, srcreg, countreg));
10794 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10795 destreg, srcreg, countreg));
10798 /* For constant aligned (or small unaligned) copies use rep movsl
10799 followed by code copying the rest. For PentiumPro ensure 8 byte
10800 alignment to allow rep movsl acceleration. */
10802 else if (count != 0
10804 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10805 || optimize_size || count < (unsigned int) 64))
10807 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10808 if (count & ~(size - 1))
10810 countreg = copy_to_mode_reg (counter_mode,
10811 GEN_INT ((count >> (size == 4 ? 2 : 3))
10812 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10813 countreg = ix86_zero_extend_to_Pmode (countreg);
10817 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10818 destreg, srcreg, countreg));
10820 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10821 destreg, srcreg, countreg));
10824 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10825 destreg, srcreg, countreg));
10827 if (size == 8 && (count & 0x04))
10828 emit_insn (gen_strmovsi (destreg, srcreg));
10830 emit_insn (gen_strmovhi (destreg, srcreg));
10832 emit_insn (gen_strmovqi (destreg, srcreg));
10834 /* The generic code based on the glibc implementation:
10835 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10836 allowing accelerated copying there)
10837 - copy the data using rep movsl
10838 - copy the rest. */
10843 int desired_alignment = (TARGET_PENTIUMPRO
10844 && (count == 0 || count >= (unsigned int) 260)
10845 ? 8 : UNITS_PER_WORD);
10847 /* In case we don't know anything about the alignment, default to
10848 library version, since it is usually equally fast and result in
10851 Also emit call when we know that the count is large and call overhead
10852 will not be important. */
10853 if (!TARGET_INLINE_ALL_STRINGOPS
10854 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10860 if (TARGET_SINGLE_STRINGOP)
10861 emit_insn (gen_cld ());
10863 countreg2 = gen_reg_rtx (Pmode);
10864 countreg = copy_to_mode_reg (counter_mode, count_exp);
10866 /* We don't use loops to align destination and to copy parts smaller
10867 than 4 bytes, because gcc is able to optimize such code better (in
10868 the case the destination or the count really is aligned, gcc is often
10869 able to predict the branches) and also it is friendlier to the
10870 hardware branch prediction.
10872 Using loops is beneficial for generic case, because we can
10873 handle small counts using the loops. Many CPUs (such as Athlon)
10874 have large REP prefix setup costs.
10876 This is quite costly. Maybe we can revisit this decision later or
10877 add some customizability to this code. */
10879 if (count == 0 && align < desired_alignment)
10881 label = gen_label_rtx ();
10882 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10883 LEU, 0, counter_mode, 1, label);
10887 rtx label = ix86_expand_aligntest (destreg, 1);
10888 emit_insn (gen_strmovqi (destreg, srcreg));
10889 ix86_adjust_counter (countreg, 1);
10890 emit_label (label);
10891 LABEL_NUSES (label) = 1;
10895 rtx label = ix86_expand_aligntest (destreg, 2);
10896 emit_insn (gen_strmovhi (destreg, srcreg));
10897 ix86_adjust_counter (countreg, 2);
10898 emit_label (label);
10899 LABEL_NUSES (label) = 1;
10901 if (align <= 4 && desired_alignment > 4)
10903 rtx label = ix86_expand_aligntest (destreg, 4);
10904 emit_insn (gen_strmovsi (destreg, srcreg));
10905 ix86_adjust_counter (countreg, 4);
10906 emit_label (label);
10907 LABEL_NUSES (label) = 1;
10910 if (label && desired_alignment > 4 && !TARGET_64BIT)
10912 emit_label (label);
10913 LABEL_NUSES (label) = 1;
10916 if (!TARGET_SINGLE_STRINGOP)
10917 emit_insn (gen_cld ());
10920 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10922 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10923 destreg, srcreg, countreg2));
10927 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10928 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10929 destreg, srcreg, countreg2));
10934 emit_label (label);
10935 LABEL_NUSES (label) = 1;
10937 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10938 emit_insn (gen_strmovsi (destreg, srcreg));
10939 if ((align <= 4 || count == 0) && TARGET_64BIT)
10941 rtx label = ix86_expand_aligntest (countreg, 4);
10942 emit_insn (gen_strmovsi (destreg, srcreg));
10943 emit_label (label);
10944 LABEL_NUSES (label) = 1;
10946 if (align > 2 && count != 0 && (count & 2))
10947 emit_insn (gen_strmovhi (destreg, srcreg));
10948 if (align <= 2 || count == 0)
10950 rtx label = ix86_expand_aligntest (countreg, 2);
10951 emit_insn (gen_strmovhi (destreg, srcreg));
10952 emit_label (label);
10953 LABEL_NUSES (label) = 1;
10955 if (align > 1 && count != 0 && (count & 1))
10956 emit_insn (gen_strmovqi (destreg, srcreg));
10957 if (align <= 1 || count == 0)
10959 rtx label = ix86_expand_aligntest (countreg, 1);
10960 emit_insn (gen_strmovqi (destreg, srcreg));
10961 emit_label (label);
10962 LABEL_NUSES (label) = 1;
10966 insns = get_insns ();
10969 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10974 /* Expand string clear operation (bzero). Use i386 string operations when
10975 profitable. expand_movstr contains similar code. */
10977 ix86_expand_clrstr (src, count_exp, align_exp)
10978 rtx src, count_exp, align_exp;
10980 rtx destreg, zeroreg, countreg;
10981 enum machine_mode counter_mode;
10982 HOST_WIDE_INT align = 0;
10983 unsigned HOST_WIDE_INT count = 0;
10985 if (GET_CODE (align_exp) == CONST_INT)
10986 align = INTVAL (align_exp);
10988 /* This simple hack avoids all inlining code and simplifies code below. */
10989 if (!TARGET_ALIGN_STRINGOPS)
10992 if (GET_CODE (count_exp) == CONST_INT)
10994 count = INTVAL (count_exp);
10995 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10998 /* Figure out proper mode for counter. For 32bits it is always SImode,
10999 for 64bits use SImode when possible, otherwise DImode.
11000 Set count to number of bytes copied when known at compile time. */
11001 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11002 || x86_64_zero_extended_value (count_exp))
11003 counter_mode = SImode;
11005 counter_mode = DImode;
11007 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11009 emit_insn (gen_cld ());
11011 /* When optimizing for size emit simple rep ; movsb instruction for
11012 counts not divisible by 4. */
11014 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11016 countreg = ix86_zero_extend_to_Pmode (count_exp);
11017 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11019 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11020 destreg, countreg));
11022 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11023 destreg, countreg));
11025 else if (count != 0
11027 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11028 || optimize_size || count < (unsigned int) 64))
11030 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11031 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11032 if (count & ~(size - 1))
11034 countreg = copy_to_mode_reg (counter_mode,
11035 GEN_INT ((count >> (size == 4 ? 2 : 3))
11036 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11037 countreg = ix86_zero_extend_to_Pmode (countreg);
11041 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11042 destreg, countreg));
11044 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11045 destreg, countreg));
11048 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11049 destreg, countreg));
11051 if (size == 8 && (count & 0x04))
11052 emit_insn (gen_strsetsi (destreg,
11053 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11055 emit_insn (gen_strsethi (destreg,
11056 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11058 emit_insn (gen_strsetqi (destreg,
11059 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11065 /* Compute desired alignment of the string operation. */
11066 int desired_alignment = (TARGET_PENTIUMPRO
11067 && (count == 0 || count >= (unsigned int) 260)
11068 ? 8 : UNITS_PER_WORD);
11070 /* In case we don't know anything about the alignment, default to
11071 library version, since it is usually equally fast and result in
11074 Also emit call when we know that the count is large and call overhead
11075 will not be important. */
11076 if (!TARGET_INLINE_ALL_STRINGOPS
11077 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11080 if (TARGET_SINGLE_STRINGOP)
11081 emit_insn (gen_cld ());
11083 countreg2 = gen_reg_rtx (Pmode);
11084 countreg = copy_to_mode_reg (counter_mode, count_exp);
11085 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11087 if (count == 0 && align < desired_alignment)
11089 label = gen_label_rtx ();
11090 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11091 LEU, 0, counter_mode, 1, label);
11095 rtx label = ix86_expand_aligntest (destreg, 1);
11096 emit_insn (gen_strsetqi (destreg,
11097 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11098 ix86_adjust_counter (countreg, 1);
11099 emit_label (label);
11100 LABEL_NUSES (label) = 1;
11104 rtx label = ix86_expand_aligntest (destreg, 2);
11105 emit_insn (gen_strsethi (destreg,
11106 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11107 ix86_adjust_counter (countreg, 2);
11108 emit_label (label);
11109 LABEL_NUSES (label) = 1;
11111 if (align <= 4 && desired_alignment > 4)
11113 rtx label = ix86_expand_aligntest (destreg, 4);
11114 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11115 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11117 ix86_adjust_counter (countreg, 4);
11118 emit_label (label);
11119 LABEL_NUSES (label) = 1;
11122 if (label && desired_alignment > 4 && !TARGET_64BIT)
11124 emit_label (label);
11125 LABEL_NUSES (label) = 1;
11129 if (!TARGET_SINGLE_STRINGOP)
11130 emit_insn (gen_cld ());
11133 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11135 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11136 destreg, countreg2));
11140 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11141 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11142 destreg, countreg2));
11146 emit_label (label);
11147 LABEL_NUSES (label) = 1;
11150 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11151 emit_insn (gen_strsetsi (destreg,
11152 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11153 if (TARGET_64BIT && (align <= 4 || count == 0))
11155 rtx label = ix86_expand_aligntest (countreg, 4);
11156 emit_insn (gen_strsetsi (destreg,
11157 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11158 emit_label (label);
11159 LABEL_NUSES (label) = 1;
11161 if (align > 2 && count != 0 && (count & 2))
11162 emit_insn (gen_strsethi (destreg,
11163 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11164 if (align <= 2 || count == 0)
11166 rtx label = ix86_expand_aligntest (countreg, 2);
11167 emit_insn (gen_strsethi (destreg,
11168 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11169 emit_label (label);
11170 LABEL_NUSES (label) = 1;
11172 if (align > 1 && count != 0 && (count & 1))
11173 emit_insn (gen_strsetqi (destreg,
11174 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11175 if (align <= 1 || count == 0)
11177 rtx label = ix86_expand_aligntest (countreg, 1);
11178 emit_insn (gen_strsetqi (destreg,
11179 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11180 emit_label (label);
11181 LABEL_NUSES (label) = 1;
11186 /* Expand strlen. */
11188 ix86_expand_strlen (out, src, eoschar, align)
11189 rtx out, src, eoschar, align;
11191 rtx addr, scratch1, scratch2, scratch3, scratch4;
11193 /* The generic case of strlen expander is long. Avoid it's
11194 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11196 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11197 && !TARGET_INLINE_ALL_STRINGOPS
11199 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11202 addr = force_reg (Pmode, XEXP (src, 0));
11203 scratch1 = gen_reg_rtx (Pmode);
11205 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11208 /* Well it seems that some optimizer does not combine a call like
11209 foo(strlen(bar), strlen(bar));
11210 when the move and the subtraction is done here. It does calculate
11211 the length just once when these instructions are done inside of
11212 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11213 often used and I use one fewer register for the lifetime of
11214 output_strlen_unroll() this is better. */
11216 emit_move_insn (out, addr);
11218 ix86_expand_strlensi_unroll_1 (out, align);
11220 /* strlensi_unroll_1 returns the address of the zero at the end of
11221 the string, like memchr(), so compute the length by subtracting
11222 the start address. */
11224 emit_insn (gen_subdi3 (out, out, addr));
11226 emit_insn (gen_subsi3 (out, out, addr));
11230 scratch2 = gen_reg_rtx (Pmode);
11231 scratch3 = gen_reg_rtx (Pmode);
11232 scratch4 = force_reg (Pmode, constm1_rtx);
11234 emit_move_insn (scratch3, addr);
11235 eoschar = force_reg (QImode, eoschar);
11237 emit_insn (gen_cld ());
11240 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11241 align, scratch4, scratch3));
11242 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11243 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11247 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11248 align, scratch4, scratch3));
11249 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11250 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11256 /* Expand the appropriate insns for doing strlen if not just doing
11259 out = result, initialized with the start address
11260 align_rtx = alignment of the address.
11261 scratch = scratch register, initialized with the startaddress when
11262 not aligned, otherwise undefined
11264 This is just the body. It needs the initialisations mentioned above and
11265 some address computing at the end. These things are done in i386.md. */
11268 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11269 rtx out, align_rtx;
11273 rtx align_2_label = NULL_RTX;
11274 rtx align_3_label = NULL_RTX;
11275 rtx align_4_label = gen_label_rtx ();
11276 rtx end_0_label = gen_label_rtx ();
11278 rtx tmpreg = gen_reg_rtx (SImode);
11279 rtx scratch = gen_reg_rtx (SImode);
11283 if (GET_CODE (align_rtx) == CONST_INT)
11284 align = INTVAL (align_rtx);
11286 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11288 /* Is there a known alignment and is it less than 4? */
11291 rtx scratch1 = gen_reg_rtx (Pmode);
11292 emit_move_insn (scratch1, out);
11293 /* Is there a known alignment and is it not 2? */
11296 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11297 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11299 /* Leave just the 3 lower bits. */
11300 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11301 NULL_RTX, 0, OPTAB_WIDEN);
11303 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11304 Pmode, 1, align_4_label);
11305 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11306 Pmode, 1, align_2_label);
11307 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11308 Pmode, 1, align_3_label);
11312 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11313 check if is aligned to 4 - byte. */
11315 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11316 NULL_RTX, 0, OPTAB_WIDEN);
11318 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11319 Pmode, 1, align_4_label);
11322 mem = gen_rtx_MEM (QImode, out);
11324 /* Now compare the bytes. */
11326 /* Compare the first n unaligned byte on a byte per byte basis. */
11327 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11328 QImode, 1, end_0_label);
11330 /* Increment the address. */
11332 emit_insn (gen_adddi3 (out, out, const1_rtx));
11334 emit_insn (gen_addsi3 (out, out, const1_rtx));
11336 /* Not needed with an alignment of 2 */
11339 emit_label (align_2_label);
11341 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11345 emit_insn (gen_adddi3 (out, out, const1_rtx));
11347 emit_insn (gen_addsi3 (out, out, const1_rtx));
11349 emit_label (align_3_label);
11352 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11356 emit_insn (gen_adddi3 (out, out, const1_rtx));
11358 emit_insn (gen_addsi3 (out, out, const1_rtx));
11361 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11362 align this loop. It gives only huge programs, but does not help to
11364 emit_label (align_4_label);
11366 mem = gen_rtx_MEM (SImode, out);
11367 emit_move_insn (scratch, mem);
11369 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11371 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11373 /* This formula yields a nonzero result iff one of the bytes is zero.
11374 This saves three branches inside loop and many cycles. */
11376 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11377 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11378 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11379 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11380 gen_int_mode (0x80808080, SImode)));
11381 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11386 rtx reg = gen_reg_rtx (SImode);
11387 rtx reg2 = gen_reg_rtx (Pmode);
11388 emit_move_insn (reg, tmpreg);
11389 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11391 /* If zero is not in the first two bytes, move two bytes forward. */
11392 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11393 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11394 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11395 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11396 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11399 /* Emit lea manually to avoid clobbering of flags. */
11400 emit_insn (gen_rtx_SET (SImode, reg2,
11401 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11403 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11404 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11405 emit_insn (gen_rtx_SET (VOIDmode, out,
11406 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11413 rtx end_2_label = gen_label_rtx ();
11414 /* Is zero in the first two bytes? */
11416 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11417 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11418 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11419 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11420 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11422 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11423 JUMP_LABEL (tmp) = end_2_label;
11425 /* Not in the first two. Move two bytes forward. */
11426 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11428 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11430 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11432 emit_label (end_2_label);
11436 /* Avoid branch in fixing the byte. */
11437 tmpreg = gen_lowpart (QImode, tmpreg);
11438 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11439 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11441 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11443 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11445 emit_label (end_0_label);
11449 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11450 rtx retval, fnaddr, callarg1, callarg2, pop;
11453 rtx use = NULL, call;
11455 if (pop == const0_rtx)
11457 if (TARGET_64BIT && pop)
11461 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11462 fnaddr = machopic_indirect_call_target (fnaddr);
11464 /* Static functions and indirect calls don't need the pic register. */
11465 if (! TARGET_64BIT && flag_pic
11466 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11467 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11468 use_reg (&use, pic_offset_table_rtx);
11470 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11472 rtx al = gen_rtx_REG (QImode, 0);
11473 emit_move_insn (al, callarg2);
11474 use_reg (&use, al);
11476 #endif /* TARGET_MACHO */
11478 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11480 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11481 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11483 if (sibcall && TARGET_64BIT
11484 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11487 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11488 fnaddr = gen_rtx_REG (Pmode, 40);
11489 emit_move_insn (fnaddr, addr);
11490 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11493 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11495 call = gen_rtx_SET (VOIDmode, retval, call);
11498 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11499 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11500 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11503 call = emit_call_insn (call);
11505 CALL_INSN_FUNCTION_USAGE (call) = use;
11509 /* Clear stack slot assignments remembered from previous functions.
11510 This is called from INIT_EXPANDERS once before RTL is emitted for each
11513 static struct machine_function *
11514 ix86_init_machine_status ()
11516 return ggc_alloc_cleared (sizeof (struct machine_function));
11519 /* Return a MEM corresponding to a stack slot with mode MODE.
11520 Allocate a new slot if necessary.
11522 The RTL for a function can have several slots available: N is
11523 which slot to use. */
11526 assign_386_stack_local (mode, n)
11527 enum machine_mode mode;
11530 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11533 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11534 ix86_stack_locals[(int) mode][n]
11535 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11537 return ix86_stack_locals[(int) mode][n];
11540 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11542 static GTY(()) rtx ix86_tls_symbol;
11544 ix86_tls_get_addr ()
11547 if (!ix86_tls_symbol)
11549 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11550 (TARGET_GNU_TLS && !TARGET_64BIT)
11551 ? "___tls_get_addr"
11552 : "__tls_get_addr");
11555 return ix86_tls_symbol;
11558 /* Calculate the length of the memory address in the instruction
11559 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11562 memory_address_length (addr)
11565 struct ix86_address parts;
11566 rtx base, index, disp;
11569 if (GET_CODE (addr) == PRE_DEC
11570 || GET_CODE (addr) == POST_INC
11571 || GET_CODE (addr) == PRE_MODIFY
11572 || GET_CODE (addr) == POST_MODIFY)
11575 if (! ix86_decompose_address (addr, &parts))
11579 index = parts.index;
11583 /* Register Indirect. */
11584 if (base && !index && !disp)
11586 /* Special cases: ebp and esp need the two-byte modrm form. */
11587 if (addr == stack_pointer_rtx
11588 || addr == arg_pointer_rtx
11589 || addr == frame_pointer_rtx
11590 || addr == hard_frame_pointer_rtx)
11594 /* Direct Addressing. */
11595 else if (disp && !base && !index)
11600 /* Find the length of the displacement constant. */
11603 if (GET_CODE (disp) == CONST_INT
11604 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11610 /* An index requires the two-byte modrm form. */
11618 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11619 is set, expect that insn have 8bit immediate alternative. */
11621 ix86_attr_length_immediate_default (insn, shortform)
11627 extract_insn_cached (insn);
11628 for (i = recog_data.n_operands - 1; i >= 0; --i)
11629 if (CONSTANT_P (recog_data.operand[i]))
11634 && GET_CODE (recog_data.operand[i]) == CONST_INT
11635 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11639 switch (get_attr_mode (insn))
11650 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11655 fatal_insn ("unknown insn mode", insn);
11661 /* Compute default value for "length_address" attribute. */
11663 ix86_attr_length_address_default (insn)
11667 extract_insn_cached (insn);
11668 for (i = recog_data.n_operands - 1; i >= 0; --i)
11669 if (GET_CODE (recog_data.operand[i]) == MEM)
11671 return memory_address_length (XEXP (recog_data.operand[i], 0));
11677 /* Return the maximum number of instructions a cpu can issue. */
11684 case PROCESSOR_PENTIUM:
11688 case PROCESSOR_PENTIUMPRO:
11689 case PROCESSOR_PENTIUM4:
11690 case PROCESSOR_ATHLON:
11699 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11700 by DEP_INSN and nothing set by DEP_INSN. */
11703 ix86_flags_dependant (insn, dep_insn, insn_type)
11704 rtx insn, dep_insn;
11705 enum attr_type insn_type;
11709 /* Simplify the test for uninteresting insns. */
11710 if (insn_type != TYPE_SETCC
11711 && insn_type != TYPE_ICMOV
11712 && insn_type != TYPE_FCMOV
11713 && insn_type != TYPE_IBR)
11716 if ((set = single_set (dep_insn)) != 0)
11718 set = SET_DEST (set);
11721 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11722 && XVECLEN (PATTERN (dep_insn), 0) == 2
11723 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11724 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11726 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11727 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11732 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11735 /* This test is true if the dependent insn reads the flags but
11736 not any other potentially set register. */
11737 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11740 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11746 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11747 address with operands set by DEP_INSN. */
11750 ix86_agi_dependant (insn, dep_insn, insn_type)
11751 rtx insn, dep_insn;
11752 enum attr_type insn_type;
11756 if (insn_type == TYPE_LEA
11759 addr = PATTERN (insn);
11760 if (GET_CODE (addr) == SET)
11762 else if (GET_CODE (addr) == PARALLEL
11763 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11764 addr = XVECEXP (addr, 0, 0);
11767 addr = SET_SRC (addr);
11772 extract_insn_cached (insn);
11773 for (i = recog_data.n_operands - 1; i >= 0; --i)
11774 if (GET_CODE (recog_data.operand[i]) == MEM)
11776 addr = XEXP (recog_data.operand[i], 0);
11783 return modified_in_p (addr, dep_insn);
11787 ix86_adjust_cost (insn, link, dep_insn, cost)
11788 rtx insn, link, dep_insn;
11791 enum attr_type insn_type, dep_insn_type;
11792 enum attr_memory memory, dep_memory;
11794 int dep_insn_code_number;
11796 /* Anti and output dependencies have zero cost on all CPUs. */
11797 if (REG_NOTE_KIND (link) != 0)
11800 dep_insn_code_number = recog_memoized (dep_insn);
11802 /* If we can't recognize the insns, we can't really do anything. */
11803 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11806 insn_type = get_attr_type (insn);
11807 dep_insn_type = get_attr_type (dep_insn);
11811 case PROCESSOR_PENTIUM:
11812 /* Address Generation Interlock adds a cycle of latency. */
11813 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11816 /* ??? Compares pair with jump/setcc. */
11817 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11820 /* Floating point stores require value to be ready one cycle earlier. */
11821 if (insn_type == TYPE_FMOV
11822 && get_attr_memory (insn) == MEMORY_STORE
11823 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11827 case PROCESSOR_PENTIUMPRO:
11828 memory = get_attr_memory (insn);
11829 dep_memory = get_attr_memory (dep_insn);
11831 /* Since we can't represent delayed latencies of load+operation,
11832 increase the cost here for non-imov insns. */
11833 if (dep_insn_type != TYPE_IMOV
11834 && dep_insn_type != TYPE_FMOV
11835 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11838 /* INT->FP conversion is expensive. */
11839 if (get_attr_fp_int_src (dep_insn))
11842 /* There is one cycle extra latency between an FP op and a store. */
11843 if (insn_type == TYPE_FMOV
11844 && (set = single_set (dep_insn)) != NULL_RTX
11845 && (set2 = single_set (insn)) != NULL_RTX
11846 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11847 && GET_CODE (SET_DEST (set2)) == MEM)
11850 /* Show ability of reorder buffer to hide latency of load by executing
11851 in parallel with previous instruction in case
11852 previous instruction is not needed to compute the address. */
11853 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11854 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11856 /* Claim moves to take one cycle, as core can issue one load
11857 at time and the next load can start cycle later. */
11858 if (dep_insn_type == TYPE_IMOV
11859 || dep_insn_type == TYPE_FMOV)
11867 memory = get_attr_memory (insn);
11868 dep_memory = get_attr_memory (dep_insn);
11869 /* The esp dependency is resolved before the instruction is really
11871 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11872 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11875 /* Since we can't represent delayed latencies of load+operation,
11876 increase the cost here for non-imov insns. */
11877 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11878 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11880 /* INT->FP conversion is expensive. */
11881 if (get_attr_fp_int_src (dep_insn))
11884 /* Show ability of reorder buffer to hide latency of load by executing
11885 in parallel with previous instruction in case
11886 previous instruction is not needed to compute the address. */
11887 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11888 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11890 /* Claim moves to take one cycle, as core can issue one load
11891 at time and the next load can start cycle later. */
11892 if (dep_insn_type == TYPE_IMOV
11893 || dep_insn_type == TYPE_FMOV)
11902 case PROCESSOR_ATHLON:
11904 memory = get_attr_memory (insn);
11905 dep_memory = get_attr_memory (dep_insn);
11907 /* Show ability of reorder buffer to hide latency of load by executing
11908 in parallel with previous instruction in case
11909 previous instruction is not needed to compute the address. */
11910 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11911 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11913 /* Claim moves to take one cycle, as core can issue one load
11914 at time and the next load can start cycle later. */
11915 if (dep_insn_type == TYPE_IMOV
11916 || dep_insn_type == TYPE_FMOV)
11918 else if (cost >= 3)
11933 struct ppro_sched_data
11936 int issued_this_cycle;
11940 static enum attr_ppro_uops
11941 ix86_safe_ppro_uops (insn)
11944 if (recog_memoized (insn) >= 0)
11945 return get_attr_ppro_uops (insn);
11947 return PPRO_UOPS_MANY;
11951 ix86_dump_ppro_packet (dump)
11954 if (ix86_sched_data.ppro.decode[0])
11956 fprintf (dump, "PPRO packet: %d",
11957 INSN_UID (ix86_sched_data.ppro.decode[0]));
11958 if (ix86_sched_data.ppro.decode[1])
11959 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11960 if (ix86_sched_data.ppro.decode[2])
11961 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11962 fputc ('\n', dump);
11966 /* We're beginning a new block. Initialize data structures as necessary. */
11969 ix86_sched_init (dump, sched_verbose, veclen)
11970 FILE *dump ATTRIBUTE_UNUSED;
11971 int sched_verbose ATTRIBUTE_UNUSED;
11972 int veclen ATTRIBUTE_UNUSED;
11974 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11977 /* Shift INSN to SLOT, and shift everything else down. */
11980 ix86_reorder_insn (insnp, slot)
11987 insnp[0] = insnp[1];
11988 while (++insnp != slot);
11994 ix86_sched_reorder_ppro (ready, e_ready)
11999 enum attr_ppro_uops cur_uops;
12000 int issued_this_cycle;
12004 /* At this point .ppro.decode contains the state of the three
12005 decoders from last "cycle". That is, those insns that were
12006 actually independent. But here we're scheduling for the
12007 decoder, and we may find things that are decodable in the
12010 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12011 issued_this_cycle = 0;
12014 cur_uops = ix86_safe_ppro_uops (*insnp);
12016 /* If the decoders are empty, and we've a complex insn at the
12017 head of the priority queue, let it issue without complaint. */
12018 if (decode[0] == NULL)
12020 if (cur_uops == PPRO_UOPS_MANY)
12022 decode[0] = *insnp;
12026 /* Otherwise, search for a 2-4 uop unsn to issue. */
12027 while (cur_uops != PPRO_UOPS_FEW)
12029 if (insnp == ready)
12031 cur_uops = ix86_safe_ppro_uops (*--insnp);
12034 /* If so, move it to the head of the line. */
12035 if (cur_uops == PPRO_UOPS_FEW)
12036 ix86_reorder_insn (insnp, e_ready);
12038 /* Issue the head of the queue. */
12039 issued_this_cycle = 1;
12040 decode[0] = *e_ready--;
12043 /* Look for simple insns to fill in the other two slots. */
12044 for (i = 1; i < 3; ++i)
12045 if (decode[i] == NULL)
12047 if (ready > e_ready)
12051 cur_uops = ix86_safe_ppro_uops (*insnp);
12052 while (cur_uops != PPRO_UOPS_ONE)
12054 if (insnp == ready)
12056 cur_uops = ix86_safe_ppro_uops (*--insnp);
12059 /* Found one. Move it to the head of the queue and issue it. */
12060 if (cur_uops == PPRO_UOPS_ONE)
12062 ix86_reorder_insn (insnp, e_ready);
12063 decode[i] = *e_ready--;
12064 issued_this_cycle++;
12068 /* ??? Didn't find one. Ideally, here we would do a lazy split
12069 of 2-uop insns, issue one and queue the other. */
12073 if (issued_this_cycle == 0)
12074 issued_this_cycle = 1;
12075 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12078 /* We are about to being issuing insns for this clock cycle.
12079 Override the default sort algorithm to better slot instructions. */
12081 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
12082 FILE *dump ATTRIBUTE_UNUSED;
12083 int sched_verbose ATTRIBUTE_UNUSED;
12086 int clock_var ATTRIBUTE_UNUSED;
12088 int n_ready = *n_readyp;
12089 rtx *e_ready = ready + n_ready - 1;
12091 /* Make sure to go ahead and initialize key items in
12092 ix86_sched_data if we are not going to bother trying to
12093 reorder the ready queue. */
12096 ix86_sched_data.ppro.issued_this_cycle = 1;
12105 case PROCESSOR_PENTIUMPRO:
12106 ix86_sched_reorder_ppro (ready, e_ready);
12111 return ix86_issue_rate ();
12114 /* We are about to issue INSN. Return the number of insns left on the
12115 ready queue that can be issued this cycle. */
12118 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12122 int can_issue_more;
12128 return can_issue_more - 1;
12130 case PROCESSOR_PENTIUMPRO:
12132 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12134 if (uops == PPRO_UOPS_MANY)
12137 ix86_dump_ppro_packet (dump);
12138 ix86_sched_data.ppro.decode[0] = insn;
12139 ix86_sched_data.ppro.decode[1] = NULL;
12140 ix86_sched_data.ppro.decode[2] = NULL;
12142 ix86_dump_ppro_packet (dump);
12143 ix86_sched_data.ppro.decode[0] = NULL;
12145 else if (uops == PPRO_UOPS_FEW)
12148 ix86_dump_ppro_packet (dump);
12149 ix86_sched_data.ppro.decode[0] = insn;
12150 ix86_sched_data.ppro.decode[1] = NULL;
12151 ix86_sched_data.ppro.decode[2] = NULL;
12155 for (i = 0; i < 3; ++i)
12156 if (ix86_sched_data.ppro.decode[i] == NULL)
12158 ix86_sched_data.ppro.decode[i] = insn;
12166 ix86_dump_ppro_packet (dump);
12167 ix86_sched_data.ppro.decode[0] = NULL;
12168 ix86_sched_data.ppro.decode[1] = NULL;
12169 ix86_sched_data.ppro.decode[2] = NULL;
12173 return --ix86_sched_data.ppro.issued_this_cycle;
12178 ia32_use_dfa_pipeline_interface ()
12180 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12185 /* How many alternative schedules to try. This should be as wide as the
12186 scheduling freedom in the DFA, but no wider. Making this value too
12187 large results extra work for the scheduler. */
12190 ia32_multipass_dfa_lookahead ()
12192 if (ix86_cpu == PROCESSOR_PENTIUM)
12199 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12200 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12204 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12206 rtx dstref, srcref, dstreg, srcreg;
12210 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12212 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12216 /* Subroutine of above to actually do the updating by recursively walking
12220 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12222 rtx dstref, srcref, dstreg, srcreg;
12224 enum rtx_code code = GET_CODE (x);
12225 const char *format_ptr = GET_RTX_FORMAT (code);
12228 if (code == MEM && XEXP (x, 0) == dstreg)
12229 MEM_COPY_ATTRIBUTES (x, dstref);
12230 else if (code == MEM && XEXP (x, 0) == srcreg)
12231 MEM_COPY_ATTRIBUTES (x, srcref);
12233 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12235 if (*format_ptr == 'e')
12236 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12238 else if (*format_ptr == 'E')
12239 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12240 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12245 /* Compute the alignment given to a constant that is being placed in memory.
12246 EXP is the constant and ALIGN is the alignment that the object would
12248 The value of this function is used instead of that alignment to align
12252 ix86_constant_alignment (exp, align)
12256 if (TREE_CODE (exp) == REAL_CST)
12258 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12260 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12263 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12270 /* Compute the alignment for a static variable.
12271 TYPE is the data type, and ALIGN is the alignment that
12272 the object would ordinarily have. The value of this function is used
12273 instead of that alignment to align the object. */
12276 ix86_data_alignment (type, align)
12280 if (AGGREGATE_TYPE_P (type)
12281 && TYPE_SIZE (type)
12282 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12283 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12284 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12287 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12288 to 16byte boundary. */
12291 if (AGGREGATE_TYPE_P (type)
12292 && TYPE_SIZE (type)
12293 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12294 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12295 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12299 if (TREE_CODE (type) == ARRAY_TYPE)
12301 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12303 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12306 else if (TREE_CODE (type) == COMPLEX_TYPE)
12309 if (TYPE_MODE (type) == DCmode && align < 64)
12311 if (TYPE_MODE (type) == XCmode && align < 128)
12314 else if ((TREE_CODE (type) == RECORD_TYPE
12315 || TREE_CODE (type) == UNION_TYPE
12316 || TREE_CODE (type) == QUAL_UNION_TYPE)
12317 && TYPE_FIELDS (type))
12319 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12321 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12324 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12325 || TREE_CODE (type) == INTEGER_TYPE)
12327 if (TYPE_MODE (type) == DFmode && align < 64)
12329 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12336 /* Compute the alignment for a local variable.
12337 TYPE is the data type, and ALIGN is the alignment that
12338 the object would ordinarily have. The value of this macro is used
12339 instead of that alignment to align the object. */
12342 ix86_local_alignment (type, align)
12346 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12347 to 16byte boundary. */
12350 if (AGGREGATE_TYPE_P (type)
12351 && TYPE_SIZE (type)
12352 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12353 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12354 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12357 if (TREE_CODE (type) == ARRAY_TYPE)
12359 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12361 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12364 else if (TREE_CODE (type) == COMPLEX_TYPE)
12366 if (TYPE_MODE (type) == DCmode && align < 64)
12368 if (TYPE_MODE (type) == XCmode && align < 128)
12371 else if ((TREE_CODE (type) == RECORD_TYPE
12372 || TREE_CODE (type) == UNION_TYPE
12373 || TREE_CODE (type) == QUAL_UNION_TYPE)
12374 && TYPE_FIELDS (type))
12376 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12378 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12381 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12382 || TREE_CODE (type) == INTEGER_TYPE)
12385 if (TYPE_MODE (type) == DFmode && align < 64)
12387 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12393 /* Emit RTL insns to initialize the variable parts of a trampoline.
12394 FNADDR is an RTX for the address of the function's pure code.
12395 CXT is an RTX for the static chain value for the function. */
12397 x86_initialize_trampoline (tramp, fnaddr, cxt)
12398 rtx tramp, fnaddr, cxt;
12402 /* Compute offset from the end of the jmp to the target function. */
12403 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12404 plus_constant (tramp, 10),
12405 NULL_RTX, 1, OPTAB_DIRECT);
12406 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12407 gen_int_mode (0xb9, QImode));
12408 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12409 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12410 gen_int_mode (0xe9, QImode));
12411 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12416 /* Try to load address using shorter movl instead of movabs.
12417 We may want to support movq for kernel mode, but kernel does not use
12418 trampolines at the moment. */
12419 if (x86_64_zero_extended_value (fnaddr))
12421 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12422 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12423 gen_int_mode (0xbb41, HImode));
12424 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12425 gen_lowpart (SImode, fnaddr));
12430 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12431 gen_int_mode (0xbb49, HImode));
12432 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12436 /* Load static chain using movabs to r10. */
12437 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12438 gen_int_mode (0xba49, HImode));
12439 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12442 /* Jump to the r11 */
12443 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12444 gen_int_mode (0xff49, HImode));
12445 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12446 gen_int_mode (0xe3, QImode));
12448 if (offset > TRAMPOLINE_SIZE)
12452 #ifdef TRANSFER_FROM_TRAMPOLINE
12453 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12454 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12458 #define def_builtin(MASK, NAME, TYPE, CODE) \
12460 if ((MASK) & target_flags) \
12461 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12462 NULL, NULL_TREE); \
12465 struct builtin_description
12467 const unsigned int mask;
12468 const enum insn_code icode;
12469 const char *const name;
12470 const enum ix86_builtins code;
12471 const enum rtx_code comparison;
12472 const unsigned int flag;
12475 /* Used for builtins that are enabled both by -msse and -msse2. */
12476 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12478 static const struct builtin_description bdesc_comi[] =
12480 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12481 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12482 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12483 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12484 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12485 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12486 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12487 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12488 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12489 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12490 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12491 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12492 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12493 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12494 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12495 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12496 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12497 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12498 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12499 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12500 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12501 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12502 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12503 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12506 static const struct builtin_description bdesc_2arg[] =
12509 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12510 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12511 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12512 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12513 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12514 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12515 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12516 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12518 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12519 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12520 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12521 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12522 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12523 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12524 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12525 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12526 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12527 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12528 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12529 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12530 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12531 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12532 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12533 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12534 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12535 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12536 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12537 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12539 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12540 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12541 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12542 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12544 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12545 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12546 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12547 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12549 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12550 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12551 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12552 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12553 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12556 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12557 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12558 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12559 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12560 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12561 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12563 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12564 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12565 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12566 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12567 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12568 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12569 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12570 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12572 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12573 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12574 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12576 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12577 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12578 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12579 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12581 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12582 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12584 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12585 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12586 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12587 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12588 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12589 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12591 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12592 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12593 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12594 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12596 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12597 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12598 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12599 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12600 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12601 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12604 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12605 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12606 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12608 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12609 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12611 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12612 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12613 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12614 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12615 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12616 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12618 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12619 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12620 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12621 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12622 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12623 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12625 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12626 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12627 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12628 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12630 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12631 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12634 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12635 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12636 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12637 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12638 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12639 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12640 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12641 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12643 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12644 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12645 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12646 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12647 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12648 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12649 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12650 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12651 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12652 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12653 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12654 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12655 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12656 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12657 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12658 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12659 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12660 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12661 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12662 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12664 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12665 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12666 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12667 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12669 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12670 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12671 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12672 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12674 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12675 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12676 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12679 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12680 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12681 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12682 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12683 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12684 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12685 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12686 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12688 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12689 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12690 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12691 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12692 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12693 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12694 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12695 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12697 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12698 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12699 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12700 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12702 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12703 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12704 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12705 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12707 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12708 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12710 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12711 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12712 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12713 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12714 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12715 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12717 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12718 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12719 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12720 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12722 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12723 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12724 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12725 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12726 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12727 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12728 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12729 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12731 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12732 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12733 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12735 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12736 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12738 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12739 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12740 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12741 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12742 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12743 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12745 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12746 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12747 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12748 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12749 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12750 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12752 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12753 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12754 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12755 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12757 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12759 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12760 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12761 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12764 static const struct builtin_description bdesc_1arg[] =
12766 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12767 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12769 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12770 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12771 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12773 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12774 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12775 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12776 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12778 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12779 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12780 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12781 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12783 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12785 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12786 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12788 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12789 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12790 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12791 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12792 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12794 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12796 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12797 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12799 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12800 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12801 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12803 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12807 ix86_init_builtins ()
12810 ix86_init_mmx_sse_builtins ();
12813 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12814 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12817 ix86_init_mmx_sse_builtins ()
12819 const struct builtin_description * d;
12822 tree pchar_type_node = build_pointer_type (char_type_node);
12823 tree pcchar_type_node = build_pointer_type (
12824 build_type_variant (char_type_node, 1, 0));
12825 tree pfloat_type_node = build_pointer_type (float_type_node);
12826 tree pcfloat_type_node = build_pointer_type (
12827 build_type_variant (float_type_node, 1, 0));
12828 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12829 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12830 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12833 tree int_ftype_v4sf_v4sf
12834 = build_function_type_list (integer_type_node,
12835 V4SF_type_node, V4SF_type_node, NULL_TREE);
12836 tree v4si_ftype_v4sf_v4sf
12837 = build_function_type_list (V4SI_type_node,
12838 V4SF_type_node, V4SF_type_node, NULL_TREE);
12839 /* MMX/SSE/integer conversions. */
12840 tree int_ftype_v4sf
12841 = build_function_type_list (integer_type_node,
12842 V4SF_type_node, NULL_TREE);
12843 tree int_ftype_v8qi
12844 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12845 tree v4sf_ftype_v4sf_int
12846 = build_function_type_list (V4SF_type_node,
12847 V4SF_type_node, integer_type_node, NULL_TREE);
12848 tree v4sf_ftype_v4sf_v2si
12849 = build_function_type_list (V4SF_type_node,
12850 V4SF_type_node, V2SI_type_node, NULL_TREE);
12851 tree int_ftype_v4hi_int
12852 = build_function_type_list (integer_type_node,
12853 V4HI_type_node, integer_type_node, NULL_TREE);
12854 tree v4hi_ftype_v4hi_int_int
12855 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12856 integer_type_node, integer_type_node,
12858 /* Miscellaneous. */
12859 tree v8qi_ftype_v4hi_v4hi
12860 = build_function_type_list (V8QI_type_node,
12861 V4HI_type_node, V4HI_type_node, NULL_TREE);
12862 tree v4hi_ftype_v2si_v2si
12863 = build_function_type_list (V4HI_type_node,
12864 V2SI_type_node, V2SI_type_node, NULL_TREE);
12865 tree v4sf_ftype_v4sf_v4sf_int
12866 = build_function_type_list (V4SF_type_node,
12867 V4SF_type_node, V4SF_type_node,
12868 integer_type_node, NULL_TREE);
12869 tree v2si_ftype_v4hi_v4hi
12870 = build_function_type_list (V2SI_type_node,
12871 V4HI_type_node, V4HI_type_node, NULL_TREE);
12872 tree v4hi_ftype_v4hi_int
12873 = build_function_type_list (V4HI_type_node,
12874 V4HI_type_node, integer_type_node, NULL_TREE);
12875 tree v4hi_ftype_v4hi_di
12876 = build_function_type_list (V4HI_type_node,
12877 V4HI_type_node, long_long_unsigned_type_node,
12879 tree v2si_ftype_v2si_di
12880 = build_function_type_list (V2SI_type_node,
12881 V2SI_type_node, long_long_unsigned_type_node,
12883 tree void_ftype_void
12884 = build_function_type (void_type_node, void_list_node);
12885 tree void_ftype_unsigned
12886 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12887 tree unsigned_ftype_void
12888 = build_function_type (unsigned_type_node, void_list_node);
12890 = build_function_type (long_long_unsigned_type_node, void_list_node);
12891 tree v4sf_ftype_void
12892 = build_function_type (V4SF_type_node, void_list_node);
12893 tree v2si_ftype_v4sf
12894 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12895 /* Loads/stores. */
12896 tree void_ftype_v8qi_v8qi_pchar
12897 = build_function_type_list (void_type_node,
12898 V8QI_type_node, V8QI_type_node,
12899 pchar_type_node, NULL_TREE);
12900 tree v4sf_ftype_pcfloat
12901 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12902 /* @@@ the type is bogus */
12903 tree v4sf_ftype_v4sf_pv2si
12904 = build_function_type_list (V4SF_type_node,
12905 V4SF_type_node, pv2si_type_node, NULL_TREE);
12906 tree void_ftype_pv2si_v4sf
12907 = build_function_type_list (void_type_node,
12908 pv2si_type_node, V4SF_type_node, NULL_TREE);
12909 tree void_ftype_pfloat_v4sf
12910 = build_function_type_list (void_type_node,
12911 pfloat_type_node, V4SF_type_node, NULL_TREE);
12912 tree void_ftype_pdi_di
12913 = build_function_type_list (void_type_node,
12914 pdi_type_node, long_long_unsigned_type_node,
12916 tree void_ftype_pv2di_v2di
12917 = build_function_type_list (void_type_node,
12918 pv2di_type_node, V2DI_type_node, NULL_TREE);
12919 /* Normal vector unops. */
12920 tree v4sf_ftype_v4sf
12921 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12923 /* Normal vector binops. */
12924 tree v4sf_ftype_v4sf_v4sf
12925 = build_function_type_list (V4SF_type_node,
12926 V4SF_type_node, V4SF_type_node, NULL_TREE);
12927 tree v8qi_ftype_v8qi_v8qi
12928 = build_function_type_list (V8QI_type_node,
12929 V8QI_type_node, V8QI_type_node, NULL_TREE);
12930 tree v4hi_ftype_v4hi_v4hi
12931 = build_function_type_list (V4HI_type_node,
12932 V4HI_type_node, V4HI_type_node, NULL_TREE);
12933 tree v2si_ftype_v2si_v2si
12934 = build_function_type_list (V2SI_type_node,
12935 V2SI_type_node, V2SI_type_node, NULL_TREE);
12936 tree di_ftype_di_di
12937 = build_function_type_list (long_long_unsigned_type_node,
12938 long_long_unsigned_type_node,
12939 long_long_unsigned_type_node, NULL_TREE);
12941 tree v2si_ftype_v2sf
12942 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12943 tree v2sf_ftype_v2si
12944 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12945 tree v2si_ftype_v2si
12946 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12947 tree v2sf_ftype_v2sf
12948 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12949 tree v2sf_ftype_v2sf_v2sf
12950 = build_function_type_list (V2SF_type_node,
12951 V2SF_type_node, V2SF_type_node, NULL_TREE);
12952 tree v2si_ftype_v2sf_v2sf
12953 = build_function_type_list (V2SI_type_node,
12954 V2SF_type_node, V2SF_type_node, NULL_TREE);
12955 tree pint_type_node = build_pointer_type (integer_type_node);
12956 tree pcint_type_node = build_pointer_type (
12957 build_type_variant (integer_type_node, 1, 0));
12958 tree pdouble_type_node = build_pointer_type (double_type_node);
12959 tree pcdouble_type_node = build_pointer_type (
12960 build_type_variant (double_type_node, 1, 0));
12961 tree int_ftype_v2df_v2df
12962 = build_function_type_list (integer_type_node,
12963 V2DF_type_node, V2DF_type_node, NULL_TREE);
12966 = build_function_type (intTI_type_node, void_list_node);
12967 tree v2di_ftype_void
12968 = build_function_type (V2DI_type_node, void_list_node);
12969 tree ti_ftype_ti_ti
12970 = build_function_type_list (intTI_type_node,
12971 intTI_type_node, intTI_type_node, NULL_TREE);
12972 tree void_ftype_pcvoid
12973 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12975 = build_function_type_list (V2DI_type_node,
12976 long_long_unsigned_type_node, NULL_TREE);
12978 = build_function_type_list (long_long_unsigned_type_node,
12979 V2DI_type_node, NULL_TREE);
12980 tree v4sf_ftype_v4si
12981 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12982 tree v4si_ftype_v4sf
12983 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12984 tree v2df_ftype_v4si
12985 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12986 tree v4si_ftype_v2df
12987 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12988 tree v2si_ftype_v2df
12989 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12990 tree v4sf_ftype_v2df
12991 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12992 tree v2df_ftype_v2si
12993 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12994 tree v2df_ftype_v4sf
12995 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12996 tree int_ftype_v2df
12997 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12998 tree v2df_ftype_v2df_int
12999 = build_function_type_list (V2DF_type_node,
13000 V2DF_type_node, integer_type_node, NULL_TREE);
13001 tree v4sf_ftype_v4sf_v2df
13002 = build_function_type_list (V4SF_type_node,
13003 V4SF_type_node, V2DF_type_node, NULL_TREE);
13004 tree v2df_ftype_v2df_v4sf
13005 = build_function_type_list (V2DF_type_node,
13006 V2DF_type_node, V4SF_type_node, NULL_TREE);
13007 tree v2df_ftype_v2df_v2df_int
13008 = build_function_type_list (V2DF_type_node,
13009 V2DF_type_node, V2DF_type_node,
13012 tree v2df_ftype_v2df_pv2si
13013 = build_function_type_list (V2DF_type_node,
13014 V2DF_type_node, pv2si_type_node, NULL_TREE);
13015 tree void_ftype_pv2si_v2df
13016 = build_function_type_list (void_type_node,
13017 pv2si_type_node, V2DF_type_node, NULL_TREE);
13018 tree void_ftype_pdouble_v2df
13019 = build_function_type_list (void_type_node,
13020 pdouble_type_node, V2DF_type_node, NULL_TREE);
13021 tree void_ftype_pint_int
13022 = build_function_type_list (void_type_node,
13023 pint_type_node, integer_type_node, NULL_TREE);
13024 tree void_ftype_v16qi_v16qi_pchar
13025 = build_function_type_list (void_type_node,
13026 V16QI_type_node, V16QI_type_node,
13027 pchar_type_node, NULL_TREE);
13028 tree v2df_ftype_pcdouble
13029 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13030 tree v2df_ftype_v2df_v2df
13031 = build_function_type_list (V2DF_type_node,
13032 V2DF_type_node, V2DF_type_node, NULL_TREE);
13033 tree v16qi_ftype_v16qi_v16qi
13034 = build_function_type_list (V16QI_type_node,
13035 V16QI_type_node, V16QI_type_node, NULL_TREE);
13036 tree v8hi_ftype_v8hi_v8hi
13037 = build_function_type_list (V8HI_type_node,
13038 V8HI_type_node, V8HI_type_node, NULL_TREE);
13039 tree v4si_ftype_v4si_v4si
13040 = build_function_type_list (V4SI_type_node,
13041 V4SI_type_node, V4SI_type_node, NULL_TREE);
13042 tree v2di_ftype_v2di_v2di
13043 = build_function_type_list (V2DI_type_node,
13044 V2DI_type_node, V2DI_type_node, NULL_TREE);
13045 tree v2di_ftype_v2df_v2df
13046 = build_function_type_list (V2DI_type_node,
13047 V2DF_type_node, V2DF_type_node, NULL_TREE);
13048 tree v2df_ftype_v2df
13049 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13050 tree v2df_ftype_double
13051 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13052 tree v2df_ftype_double_double
13053 = build_function_type_list (V2DF_type_node,
13054 double_type_node, double_type_node, NULL_TREE);
13055 tree int_ftype_v8hi_int
13056 = build_function_type_list (integer_type_node,
13057 V8HI_type_node, integer_type_node, NULL_TREE);
13058 tree v8hi_ftype_v8hi_int_int
13059 = build_function_type_list (V8HI_type_node,
13060 V8HI_type_node, integer_type_node,
13061 integer_type_node, NULL_TREE);
13062 tree v2di_ftype_v2di_int
13063 = build_function_type_list (V2DI_type_node,
13064 V2DI_type_node, integer_type_node, NULL_TREE);
13065 tree v4si_ftype_v4si_int
13066 = build_function_type_list (V4SI_type_node,
13067 V4SI_type_node, integer_type_node, NULL_TREE);
13068 tree v8hi_ftype_v8hi_int
13069 = build_function_type_list (V8HI_type_node,
13070 V8HI_type_node, integer_type_node, NULL_TREE);
13071 tree v8hi_ftype_v8hi_v2di
13072 = build_function_type_list (V8HI_type_node,
13073 V8HI_type_node, V2DI_type_node, NULL_TREE);
13074 tree v4si_ftype_v4si_v2di
13075 = build_function_type_list (V4SI_type_node,
13076 V4SI_type_node, V2DI_type_node, NULL_TREE);
13077 tree v4si_ftype_v8hi_v8hi
13078 = build_function_type_list (V4SI_type_node,
13079 V8HI_type_node, V8HI_type_node, NULL_TREE);
13080 tree di_ftype_v8qi_v8qi
13081 = build_function_type_list (long_long_unsigned_type_node,
13082 V8QI_type_node, V8QI_type_node, NULL_TREE);
13083 tree v2di_ftype_v16qi_v16qi
13084 = build_function_type_list (V2DI_type_node,
13085 V16QI_type_node, V16QI_type_node, NULL_TREE);
13086 tree int_ftype_v16qi
13087 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13088 tree v16qi_ftype_pcchar
13089 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13090 tree void_ftype_pchar_v16qi
13091 = build_function_type_list (void_type_node,
13092 pchar_type_node, V16QI_type_node, NULL_TREE);
13093 tree v4si_ftype_pcint
13094 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13095 tree void_ftype_pcint_v4si
13096 = build_function_type_list (void_type_node,
13097 pcint_type_node, V4SI_type_node, NULL_TREE);
13098 tree v2di_ftype_v2di
13099 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13101 /* Add all builtins that are more or less simple operations on two
13103 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13105 /* Use one of the operands; the target can have a different mode for
13106 mask-generating compares. */
13107 enum machine_mode mode;
13112 mode = insn_data[d->icode].operand[1].mode;
13117 type = v16qi_ftype_v16qi_v16qi;
13120 type = v8hi_ftype_v8hi_v8hi;
13123 type = v4si_ftype_v4si_v4si;
13126 type = v2di_ftype_v2di_v2di;
13129 type = v2df_ftype_v2df_v2df;
13132 type = ti_ftype_ti_ti;
13135 type = v4sf_ftype_v4sf_v4sf;
13138 type = v8qi_ftype_v8qi_v8qi;
13141 type = v4hi_ftype_v4hi_v4hi;
13144 type = v2si_ftype_v2si_v2si;
13147 type = di_ftype_di_di;
13154 /* Override for comparisons. */
13155 if (d->icode == CODE_FOR_maskcmpv4sf3
13156 || d->icode == CODE_FOR_maskncmpv4sf3
13157 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13158 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13159 type = v4si_ftype_v4sf_v4sf;
13161 if (d->icode == CODE_FOR_maskcmpv2df3
13162 || d->icode == CODE_FOR_maskncmpv2df3
13163 || d->icode == CODE_FOR_vmmaskcmpv2df3
13164 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13165 type = v2di_ftype_v2df_v2df;
13167 def_builtin (d->mask, d->name, type, d->code);
13170 /* Add the remaining MMX insns with somewhat more complicated types. */
13171 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13172 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13173 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13174 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13175 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13177 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13178 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13179 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13181 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13182 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13184 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13185 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13187 /* comi/ucomi insns. */
13188 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13189 if (d->mask == MASK_SSE2)
13190 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13192 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13194 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13195 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13196 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13198 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13199 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13200 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13201 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13202 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13203 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13204 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13205 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13207 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13208 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13210 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13212 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13213 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13214 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13215 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13216 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13217 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13219 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13220 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13221 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13222 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13224 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13225 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13226 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13227 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13229 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13231 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13233 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13234 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13235 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13236 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13237 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13238 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13240 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13242 /* Original 3DNow! */
13243 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13244 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13245 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13246 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13247 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13248 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13249 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13250 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13251 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13252 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13253 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13254 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13255 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13256 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13257 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13258 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13259 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13260 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13261 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13262 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13264 /* 3DNow! extension as used in the Athlon CPU. */
13265 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13266 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13267 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13268 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13269 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13270 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13272 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13275 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13276 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13278 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13279 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13280 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13282 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13283 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13284 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13285 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13286 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13287 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13289 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13290 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13291 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13292 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13294 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13295 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13296 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13297 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13298 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13300 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13301 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13302 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13303 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13305 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13306 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13308 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13310 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13311 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13313 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13314 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13315 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13316 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13317 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13319 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13321 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13322 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13324 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13325 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13326 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13328 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13329 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13330 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13332 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13333 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13334 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13335 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13336 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13337 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13338 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13340 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13341 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13342 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13344 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13345 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13346 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13347 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13348 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13349 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13350 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13352 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13354 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13355 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13356 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13358 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13359 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13360 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13362 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13363 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13365 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13366 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13367 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13368 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13370 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13371 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13372 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13373 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13375 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13376 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13378 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13381 /* Errors in the source file can cause expand_expr to return const0_rtx
13382 where we expect a vector. To avoid crashing, use one of the vector
13383 clear instructions. */
13385 safe_vector_operand (x, mode)
13387 enum machine_mode mode;
13389 if (x != const0_rtx)
13391 x = gen_reg_rtx (mode);
13393 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13394 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13395 : gen_rtx_SUBREG (DImode, x, 0)));
13397 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13398 : gen_rtx_SUBREG (V4SFmode, x, 0),
13399 CONST0_RTX (V4SFmode)));
13403 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13406 ix86_expand_binop_builtin (icode, arglist, target)
13407 enum insn_code icode;
13412 tree arg0 = TREE_VALUE (arglist);
13413 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13414 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13415 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13416 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13417 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13418 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13420 if (VECTOR_MODE_P (mode0))
13421 op0 = safe_vector_operand (op0, mode0);
13422 if (VECTOR_MODE_P (mode1))
13423 op1 = safe_vector_operand (op1, mode1);
13426 || GET_MODE (target) != tmode
13427 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13428 target = gen_reg_rtx (tmode);
13430 /* In case the insn wants input operands in modes different from
13431 the result, abort. */
13432 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13435 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13436 op0 = copy_to_mode_reg (mode0, op0);
13437 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13438 op1 = copy_to_mode_reg (mode1, op1);
13440 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13441 yet one of the two must not be a memory. This is normally enforced
13442 by expanders, but we didn't bother to create one here. */
13443 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13444 op0 = copy_to_mode_reg (mode0, op0);
13446 pat = GEN_FCN (icode) (target, op0, op1);
13453 /* Subroutine of ix86_expand_builtin to take care of stores. */
13456 ix86_expand_store_builtin (icode, arglist)
13457 enum insn_code icode;
13461 tree arg0 = TREE_VALUE (arglist);
13462 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13463 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13464 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13465 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13466 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13468 if (VECTOR_MODE_P (mode1))
13469 op1 = safe_vector_operand (op1, mode1);
13471 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13472 op1 = copy_to_mode_reg (mode1, op1);
13474 pat = GEN_FCN (icode) (op0, op1);
13480 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13483 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13484 enum insn_code icode;
13490 tree arg0 = TREE_VALUE (arglist);
13491 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13492 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13493 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13496 || GET_MODE (target) != tmode
13497 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13498 target = gen_reg_rtx (tmode);
13500 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13503 if (VECTOR_MODE_P (mode0))
13504 op0 = safe_vector_operand (op0, mode0);
13506 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13507 op0 = copy_to_mode_reg (mode0, op0);
13510 pat = GEN_FCN (icode) (target, op0);
13517 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13518 sqrtss, rsqrtss, rcpss. */
13521 ix86_expand_unop1_builtin (icode, arglist, target)
13522 enum insn_code icode;
13527 tree arg0 = TREE_VALUE (arglist);
13528 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13529 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13530 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13533 || GET_MODE (target) != tmode
13534 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13535 target = gen_reg_rtx (tmode);
13537 if (VECTOR_MODE_P (mode0))
13538 op0 = safe_vector_operand (op0, mode0);
13540 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13541 op0 = copy_to_mode_reg (mode0, op0);
13544 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13545 op1 = copy_to_mode_reg (mode0, op1);
13547 pat = GEN_FCN (icode) (target, op0, op1);
13554 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13557 ix86_expand_sse_compare (d, arglist, target)
13558 const struct builtin_description *d;
13563 tree arg0 = TREE_VALUE (arglist);
13564 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13565 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13566 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13568 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13569 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13570 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13571 enum rtx_code comparison = d->comparison;
13573 if (VECTOR_MODE_P (mode0))
13574 op0 = safe_vector_operand (op0, mode0);
13575 if (VECTOR_MODE_P (mode1))
13576 op1 = safe_vector_operand (op1, mode1);
13578 /* Swap operands if we have a comparison that isn't available in
13582 rtx tmp = gen_reg_rtx (mode1);
13583 emit_move_insn (tmp, op1);
13589 || GET_MODE (target) != tmode
13590 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13591 target = gen_reg_rtx (tmode);
13593 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13594 op0 = copy_to_mode_reg (mode0, op0);
13595 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13596 op1 = copy_to_mode_reg (mode1, op1);
13598 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13599 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13606 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13609 ix86_expand_sse_comi (d, arglist, target)
13610 const struct builtin_description *d;
13615 tree arg0 = TREE_VALUE (arglist);
13616 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13617 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13618 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13620 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13621 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13622 enum rtx_code comparison = d->comparison;
13624 if (VECTOR_MODE_P (mode0))
13625 op0 = safe_vector_operand (op0, mode0);
13626 if (VECTOR_MODE_P (mode1))
13627 op1 = safe_vector_operand (op1, mode1);
13629 /* Swap operands if we have a comparison that isn't available in
13638 target = gen_reg_rtx (SImode);
13639 emit_move_insn (target, const0_rtx);
13640 target = gen_rtx_SUBREG (QImode, target, 0);
13642 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13643 op0 = copy_to_mode_reg (mode0, op0);
13644 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13645 op1 = copy_to_mode_reg (mode1, op1);
13647 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13648 pat = GEN_FCN (d->icode) (op0, op1);
13652 emit_insn (gen_rtx_SET (VOIDmode,
13653 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13654 gen_rtx_fmt_ee (comparison, QImode,
13658 return SUBREG_REG (target);
13661 /* Expand an expression EXP that calls a built-in function,
13662 with result going to TARGET if that's convenient
13663 (and in mode MODE if that's convenient).
13664 SUBTARGET may be used as the target for computing one of EXP's operands.
13665 IGNORE is nonzero if the value is to be ignored. */
13668 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13671 rtx subtarget ATTRIBUTE_UNUSED;
13672 enum machine_mode mode ATTRIBUTE_UNUSED;
13673 int ignore ATTRIBUTE_UNUSED;
13675 const struct builtin_description *d;
13677 enum insn_code icode;
13678 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13679 tree arglist = TREE_OPERAND (exp, 1);
13680 tree arg0, arg1, arg2;
13681 rtx op0, op1, op2, pat;
13682 enum machine_mode tmode, mode0, mode1, mode2;
13683 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13687 case IX86_BUILTIN_EMMS:
13688 emit_insn (gen_emms ());
13691 case IX86_BUILTIN_SFENCE:
13692 emit_insn (gen_sfence ());
13695 case IX86_BUILTIN_PEXTRW:
13696 case IX86_BUILTIN_PEXTRW128:
13697 icode = (fcode == IX86_BUILTIN_PEXTRW
13698 ? CODE_FOR_mmx_pextrw
13699 : CODE_FOR_sse2_pextrw);
13700 arg0 = TREE_VALUE (arglist);
13701 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13702 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13703 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13704 tmode = insn_data[icode].operand[0].mode;
13705 mode0 = insn_data[icode].operand[1].mode;
13706 mode1 = insn_data[icode].operand[2].mode;
13708 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13709 op0 = copy_to_mode_reg (mode0, op0);
13710 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13712 /* @@@ better error message */
13713 error ("selector must be an immediate");
13714 return gen_reg_rtx (tmode);
13717 || GET_MODE (target) != tmode
13718 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13719 target = gen_reg_rtx (tmode);
13720 pat = GEN_FCN (icode) (target, op0, op1);
13726 case IX86_BUILTIN_PINSRW:
13727 case IX86_BUILTIN_PINSRW128:
13728 icode = (fcode == IX86_BUILTIN_PINSRW
13729 ? CODE_FOR_mmx_pinsrw
13730 : CODE_FOR_sse2_pinsrw);
13731 arg0 = TREE_VALUE (arglist);
13732 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13733 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13734 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13735 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13736 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13737 tmode = insn_data[icode].operand[0].mode;
13738 mode0 = insn_data[icode].operand[1].mode;
13739 mode1 = insn_data[icode].operand[2].mode;
13740 mode2 = insn_data[icode].operand[3].mode;
13742 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13743 op0 = copy_to_mode_reg (mode0, op0);
13744 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13745 op1 = copy_to_mode_reg (mode1, op1);
13746 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13748 /* @@@ better error message */
13749 error ("selector must be an immediate");
13753 || GET_MODE (target) != tmode
13754 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13755 target = gen_reg_rtx (tmode);
13756 pat = GEN_FCN (icode) (target, op0, op1, op2);
13762 case IX86_BUILTIN_MASKMOVQ:
13763 case IX86_BUILTIN_MASKMOVDQU:
13764 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13765 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13766 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13767 : CODE_FOR_sse2_maskmovdqu));
13768 /* Note the arg order is different from the operand order. */
13769 arg1 = TREE_VALUE (arglist);
13770 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13771 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13772 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13773 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13774 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13775 mode0 = insn_data[icode].operand[0].mode;
13776 mode1 = insn_data[icode].operand[1].mode;
13777 mode2 = insn_data[icode].operand[2].mode;
13779 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13780 op0 = copy_to_mode_reg (mode0, op0);
13781 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13782 op1 = copy_to_mode_reg (mode1, op1);
13783 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13784 op2 = copy_to_mode_reg (mode2, op2);
13785 pat = GEN_FCN (icode) (op0, op1, op2);
13791 case IX86_BUILTIN_SQRTSS:
13792 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13793 case IX86_BUILTIN_RSQRTSS:
13794 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13795 case IX86_BUILTIN_RCPSS:
13796 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13798 case IX86_BUILTIN_LOADAPS:
13799 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13801 case IX86_BUILTIN_LOADUPS:
13802 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13804 case IX86_BUILTIN_STOREAPS:
13805 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13807 case IX86_BUILTIN_STOREUPS:
13808 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13810 case IX86_BUILTIN_LOADSS:
13811 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13813 case IX86_BUILTIN_STORESS:
13814 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13816 case IX86_BUILTIN_LOADHPS:
13817 case IX86_BUILTIN_LOADLPS:
13818 case IX86_BUILTIN_LOADHPD:
13819 case IX86_BUILTIN_LOADLPD:
13820 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13821 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13822 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13823 : CODE_FOR_sse2_movlpd);
13824 arg0 = TREE_VALUE (arglist);
13825 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13826 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13827 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13828 tmode = insn_data[icode].operand[0].mode;
13829 mode0 = insn_data[icode].operand[1].mode;
13830 mode1 = insn_data[icode].operand[2].mode;
13832 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13833 op0 = copy_to_mode_reg (mode0, op0);
13834 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13836 || GET_MODE (target) != tmode
13837 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13838 target = gen_reg_rtx (tmode);
13839 pat = GEN_FCN (icode) (target, op0, op1);
13845 case IX86_BUILTIN_STOREHPS:
13846 case IX86_BUILTIN_STORELPS:
13847 case IX86_BUILTIN_STOREHPD:
13848 case IX86_BUILTIN_STORELPD:
13849 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13850 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13851 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13852 : CODE_FOR_sse2_movlpd);
13853 arg0 = TREE_VALUE (arglist);
13854 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13855 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13856 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13857 mode0 = insn_data[icode].operand[1].mode;
13858 mode1 = insn_data[icode].operand[2].mode;
13860 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13861 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13862 op1 = copy_to_mode_reg (mode1, op1);
13864 pat = GEN_FCN (icode) (op0, op0, op1);
13870 case IX86_BUILTIN_MOVNTPS:
13871 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13872 case IX86_BUILTIN_MOVNTQ:
13873 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13875 case IX86_BUILTIN_LDMXCSR:
13876 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13877 target = assign_386_stack_local (SImode, 0);
13878 emit_move_insn (target, op0);
13879 emit_insn (gen_ldmxcsr (target));
13882 case IX86_BUILTIN_STMXCSR:
13883 target = assign_386_stack_local (SImode, 0);
13884 emit_insn (gen_stmxcsr (target));
13885 return copy_to_mode_reg (SImode, target);
13887 case IX86_BUILTIN_SHUFPS:
13888 case IX86_BUILTIN_SHUFPD:
13889 icode = (fcode == IX86_BUILTIN_SHUFPS
13890 ? CODE_FOR_sse_shufps
13891 : CODE_FOR_sse2_shufpd);
13892 arg0 = TREE_VALUE (arglist);
13893 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13894 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13895 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13896 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13897 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13898 tmode = insn_data[icode].operand[0].mode;
13899 mode0 = insn_data[icode].operand[1].mode;
13900 mode1 = insn_data[icode].operand[2].mode;
13901 mode2 = insn_data[icode].operand[3].mode;
13903 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13904 op0 = copy_to_mode_reg (mode0, op0);
13905 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13906 op1 = copy_to_mode_reg (mode1, op1);
13907 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13909 /* @@@ better error message */
13910 error ("mask must be an immediate");
13911 return gen_reg_rtx (tmode);
13914 || GET_MODE (target) != tmode
13915 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13916 target = gen_reg_rtx (tmode);
13917 pat = GEN_FCN (icode) (target, op0, op1, op2);
13923 case IX86_BUILTIN_PSHUFW:
13924 case IX86_BUILTIN_PSHUFD:
13925 case IX86_BUILTIN_PSHUFHW:
13926 case IX86_BUILTIN_PSHUFLW:
13927 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13928 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13929 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13930 : CODE_FOR_mmx_pshufw);
13931 arg0 = TREE_VALUE (arglist);
13932 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13933 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13934 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13935 tmode = insn_data[icode].operand[0].mode;
13936 mode1 = insn_data[icode].operand[1].mode;
13937 mode2 = insn_data[icode].operand[2].mode;
13939 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13940 op0 = copy_to_mode_reg (mode1, op0);
13941 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13943 /* @@@ better error message */
13944 error ("mask must be an immediate");
13948 || GET_MODE (target) != tmode
13949 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13950 target = gen_reg_rtx (tmode);
13951 pat = GEN_FCN (icode) (target, op0, op1);
13957 case IX86_BUILTIN_PSLLDQI128:
13958 case IX86_BUILTIN_PSRLDQI128:
13959 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13960 : CODE_FOR_sse2_lshrti3);
13961 arg0 = TREE_VALUE (arglist);
13962 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13963 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13964 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13965 tmode = insn_data[icode].operand[0].mode;
13966 mode1 = insn_data[icode].operand[1].mode;
13967 mode2 = insn_data[icode].operand[2].mode;
13969 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13971 op0 = copy_to_reg (op0);
13972 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13974 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13976 error ("shift must be an immediate");
13979 target = gen_reg_rtx (V2DImode);
13980 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13986 case IX86_BUILTIN_FEMMS:
13987 emit_insn (gen_femms ());
13990 case IX86_BUILTIN_PAVGUSB:
13991 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13993 case IX86_BUILTIN_PF2ID:
13994 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13996 case IX86_BUILTIN_PFACC:
13997 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13999 case IX86_BUILTIN_PFADD:
14000 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14002 case IX86_BUILTIN_PFCMPEQ:
14003 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14005 case IX86_BUILTIN_PFCMPGE:
14006 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14008 case IX86_BUILTIN_PFCMPGT:
14009 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14011 case IX86_BUILTIN_PFMAX:
14012 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14014 case IX86_BUILTIN_PFMIN:
14015 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14017 case IX86_BUILTIN_PFMUL:
14018 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14020 case IX86_BUILTIN_PFRCP:
14021 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14023 case IX86_BUILTIN_PFRCPIT1:
14024 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14026 case IX86_BUILTIN_PFRCPIT2:
14027 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14029 case IX86_BUILTIN_PFRSQIT1:
14030 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14032 case IX86_BUILTIN_PFRSQRT:
14033 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14035 case IX86_BUILTIN_PFSUB:
14036 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14038 case IX86_BUILTIN_PFSUBR:
14039 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14041 case IX86_BUILTIN_PI2FD:
14042 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14044 case IX86_BUILTIN_PMULHRW:
14045 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14047 case IX86_BUILTIN_PF2IW:
14048 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14050 case IX86_BUILTIN_PFNACC:
14051 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14053 case IX86_BUILTIN_PFPNACC:
14054 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14056 case IX86_BUILTIN_PI2FW:
14057 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14059 case IX86_BUILTIN_PSWAPDSI:
14060 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14062 case IX86_BUILTIN_PSWAPDSF:
14063 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14065 case IX86_BUILTIN_SSE_ZERO:
14066 target = gen_reg_rtx (V4SFmode);
14067 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14070 case IX86_BUILTIN_MMX_ZERO:
14071 target = gen_reg_rtx (DImode);
14072 emit_insn (gen_mmx_clrdi (target));
14075 case IX86_BUILTIN_CLRTI:
14076 target = gen_reg_rtx (V2DImode);
14077 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14081 case IX86_BUILTIN_SQRTSD:
14082 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14083 case IX86_BUILTIN_LOADAPD:
14084 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14085 case IX86_BUILTIN_LOADUPD:
14086 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14088 case IX86_BUILTIN_STOREAPD:
14089 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14090 case IX86_BUILTIN_STOREUPD:
14091 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14093 case IX86_BUILTIN_LOADSD:
14094 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14096 case IX86_BUILTIN_STORESD:
14097 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14099 case IX86_BUILTIN_SETPD1:
14100 target = assign_386_stack_local (DFmode, 0);
14101 arg0 = TREE_VALUE (arglist);
14102 emit_move_insn (adjust_address (target, DFmode, 0),
14103 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14104 op0 = gen_reg_rtx (V2DFmode);
14105 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14106 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14109 case IX86_BUILTIN_SETPD:
14110 target = assign_386_stack_local (V2DFmode, 0);
14111 arg0 = TREE_VALUE (arglist);
14112 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14113 emit_move_insn (adjust_address (target, DFmode, 0),
14114 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14115 emit_move_insn (adjust_address (target, DFmode, 8),
14116 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14117 op0 = gen_reg_rtx (V2DFmode);
14118 emit_insn (gen_sse2_movapd (op0, target));
14121 case IX86_BUILTIN_LOADRPD:
14122 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14123 gen_reg_rtx (V2DFmode), 1);
14124 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14127 case IX86_BUILTIN_LOADPD1:
14128 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14129 gen_reg_rtx (V2DFmode), 1);
14130 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14133 case IX86_BUILTIN_STOREPD1:
14134 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14135 case IX86_BUILTIN_STORERPD:
14136 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14138 case IX86_BUILTIN_CLRPD:
14139 target = gen_reg_rtx (V2DFmode);
14140 emit_insn (gen_sse_clrv2df (target));
14143 case IX86_BUILTIN_MFENCE:
14144 emit_insn (gen_sse2_mfence ());
14146 case IX86_BUILTIN_LFENCE:
14147 emit_insn (gen_sse2_lfence ());
14150 case IX86_BUILTIN_CLFLUSH:
14151 arg0 = TREE_VALUE (arglist);
14152 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14153 icode = CODE_FOR_sse2_clflush;
14154 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14155 op0 = copy_to_mode_reg (Pmode, op0);
14157 emit_insn (gen_sse2_clflush (op0));
14160 case IX86_BUILTIN_MOVNTPD:
14161 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14162 case IX86_BUILTIN_MOVNTDQ:
14163 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14164 case IX86_BUILTIN_MOVNTI:
14165 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14167 case IX86_BUILTIN_LOADDQA:
14168 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14169 case IX86_BUILTIN_LOADDQU:
14170 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14171 case IX86_BUILTIN_LOADD:
14172 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14174 case IX86_BUILTIN_STOREDQA:
14175 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14176 case IX86_BUILTIN_STOREDQU:
14177 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14178 case IX86_BUILTIN_STORED:
14179 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14185 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14186 if (d->code == fcode)
14188 /* Compares are treated specially. */
14189 if (d->icode == CODE_FOR_maskcmpv4sf3
14190 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14191 || d->icode == CODE_FOR_maskncmpv4sf3
14192 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14193 || d->icode == CODE_FOR_maskcmpv2df3
14194 || d->icode == CODE_FOR_vmmaskcmpv2df3
14195 || d->icode == CODE_FOR_maskncmpv2df3
14196 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14197 return ix86_expand_sse_compare (d, arglist, target);
14199 return ix86_expand_binop_builtin (d->icode, arglist, target);
14202 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14203 if (d->code == fcode)
14204 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14206 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14207 if (d->code == fcode)
14208 return ix86_expand_sse_comi (d, arglist, target);
14210 /* @@@ Should really do something sensible here. */
14214 /* Store OPERAND to the memory after reload is completed. This means
14215 that we can't easily use assign_stack_local. */
14217 ix86_force_to_memory (mode, operand)
14218 enum machine_mode mode;
14222 if (!reload_completed)
14224 if (TARGET_64BIT && TARGET_RED_ZONE)
14226 result = gen_rtx_MEM (mode,
14227 gen_rtx_PLUS (Pmode,
14229 GEN_INT (-RED_ZONE_SIZE)));
14230 emit_move_insn (result, operand);
14232 else if (TARGET_64BIT && !TARGET_RED_ZONE)
14238 operand = gen_lowpart (DImode, operand);
14242 gen_rtx_SET (VOIDmode,
14243 gen_rtx_MEM (DImode,
14244 gen_rtx_PRE_DEC (DImode,
14245 stack_pointer_rtx)),
14251 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14260 split_di (&operand, 1, operands, operands + 1);
14262 gen_rtx_SET (VOIDmode,
14263 gen_rtx_MEM (SImode,
14264 gen_rtx_PRE_DEC (Pmode,
14265 stack_pointer_rtx)),
14268 gen_rtx_SET (VOIDmode,
14269 gen_rtx_MEM (SImode,
14270 gen_rtx_PRE_DEC (Pmode,
14271 stack_pointer_rtx)),
14276 /* It is better to store HImodes as SImodes. */
14277 if (!TARGET_PARTIAL_REG_STALL)
14278 operand = gen_lowpart (SImode, operand);
14282 gen_rtx_SET (VOIDmode,
14283 gen_rtx_MEM (GET_MODE (operand),
14284 gen_rtx_PRE_DEC (SImode,
14285 stack_pointer_rtx)),
14291 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14296 /* Free operand from the memory. */
14298 ix86_free_from_memory (mode)
14299 enum machine_mode mode;
14301 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14305 if (mode == DImode || TARGET_64BIT)
14307 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14311 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14312 to pop or add instruction if registers are available. */
14313 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14314 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14319 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14320 QImode must go into class Q_REGS.
14321 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14322 movdf to do mem-to-mem moves through integer regs. */
14324 ix86_preferred_reload_class (x, class)
14326 enum reg_class class;
14328 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14330 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14332 /* SSE can't load any constant directly yet. */
14333 if (SSE_CLASS_P (class))
14335 /* Floats can load 0 and 1. */
14336 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14338 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14339 if (MAYBE_SSE_CLASS_P (class))
14340 return (reg_class_subset_p (class, GENERAL_REGS)
14341 ? GENERAL_REGS : FLOAT_REGS);
14345 /* General regs can load everything. */
14346 if (reg_class_subset_p (class, GENERAL_REGS))
14347 return GENERAL_REGS;
14348 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14349 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14352 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14354 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14359 /* If we are copying between general and FP registers, we need a memory
14360 location. The same is true for SSE and MMX registers.
14362 The macro can't work reliably when one of the CLASSES is class containing
14363 registers from multiple units (SSE, MMX, integer). We avoid this by never
14364 combining those units in single alternative in the machine description.
14365 Ensure that this constraint holds to avoid unexpected surprises.
14367 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14368 enforce these sanity checks. */
14370 ix86_secondary_memory_needed (class1, class2, mode, strict)
14371 enum reg_class class1, class2;
14372 enum machine_mode mode;
14375 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14376 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14377 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14378 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14379 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14380 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14387 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14388 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14389 && (mode) != SImode)
14390 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14391 && (mode) != SImode));
14393 /* Return the cost of moving data from a register in class CLASS1 to
14394 one in class CLASS2.
14396 It is not required that the cost always equal 2 when FROM is the same as TO;
14397 on some machines it is expensive to move between registers if they are not
14398 general registers. */
14400 ix86_register_move_cost (mode, class1, class2)
14401 enum machine_mode mode;
14402 enum reg_class class1, class2;
14404 /* In case we require secondary memory, compute cost of the store followed
14405 by load. In order to avoid bad register allocation choices, we need
14406 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14408 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14412 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14413 MEMORY_MOVE_COST (mode, class1, 1));
14414 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14415 MEMORY_MOVE_COST (mode, class2, 1));
14417 /* In case of copying from general_purpose_register we may emit multiple
14418 stores followed by single load causing memory size mismatch stall.
14419 Count this as arbitrarily high cost of 20. */
14420 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14423 /* In the case of FP/MMX moves, the registers actually overlap, and we
14424 have to switch modes in order to treat them differently. */
14425 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14426 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14432 /* Moves between SSE/MMX and integer unit are expensive. */
14433 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14434 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14435 return ix86_cost->mmxsse_to_integer;
14436 if (MAYBE_FLOAT_CLASS_P (class1))
14437 return ix86_cost->fp_move;
14438 if (MAYBE_SSE_CLASS_P (class1))
14439 return ix86_cost->sse_move;
14440 if (MAYBE_MMX_CLASS_P (class1))
14441 return ix86_cost->mmx_move;
14445 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14447 ix86_hard_regno_mode_ok (regno, mode)
14449 enum machine_mode mode;
14451 /* Flags and only flags can only hold CCmode values. */
14452 if (CC_REGNO_P (regno))
14453 return GET_MODE_CLASS (mode) == MODE_CC;
14454 if (GET_MODE_CLASS (mode) == MODE_CC
14455 || GET_MODE_CLASS (mode) == MODE_RANDOM
14456 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14458 if (FP_REGNO_P (regno))
14459 return VALID_FP_MODE_P (mode);
14460 if (SSE_REGNO_P (regno))
14461 return VALID_SSE_REG_MODE (mode);
14462 if (MMX_REGNO_P (regno))
14463 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
14464 /* We handle both integer and floats in the general purpose registers.
14465 In future we should be able to handle vector modes as well. */
14466 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14468 /* Take care for QImode values - they can be in non-QI regs, but then
14469 they do cause partial register stalls. */
14470 if (regno < 4 || mode != QImode || TARGET_64BIT)
14472 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14475 /* Return the cost of moving data of mode M between a
14476 register and memory. A value of 2 is the default; this cost is
14477 relative to those in `REGISTER_MOVE_COST'.
14479 If moving between registers and memory is more expensive than
14480 between two registers, you should define this macro to express the
14483 Model also increased moving costs of QImode registers in non
14487 ix86_memory_move_cost (mode, class, in)
14488 enum machine_mode mode;
14489 enum reg_class class;
14492 if (FLOAT_CLASS_P (class))
14510 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14512 if (SSE_CLASS_P (class))
14515 switch (GET_MODE_SIZE (mode))
14529 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14531 if (MMX_CLASS_P (class))
14534 switch (GET_MODE_SIZE (mode))
14545 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14547 switch (GET_MODE_SIZE (mode))
14551 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14552 : ix86_cost->movzbl_load);
14554 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14555 : ix86_cost->int_store[0] + 4);
14558 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14560 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14561 if (mode == TFmode)
14563 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14564 * ((int) GET_MODE_SIZE (mode)
14565 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14569 /* Compute a (partial) cost for rtx X. Return true if the complete
14570 cost has been computed, and false if subexpressions should be
14571 scanned. In either case, *TOTAL contains the cost result. */
14574 ix86_rtx_costs (x, code, outer_code, total)
14576 int code, outer_code;
14579 enum machine_mode mode = GET_MODE (x);
14587 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14589 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14591 else if (flag_pic && SYMBOLIC_CONST (x))
14598 if (mode == VOIDmode)
14601 switch (standard_80387_constant_p (x))
14610 /* Start with (MEM (SYMBOL_REF)), since that's where
14611 it'll probably end up. Add a penalty for size. */
14612 *total = (COSTS_N_INSNS (1)
14614 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14620 /* The zero extensions is often completely free on x86_64, so make
14621 it as cheap as possible. */
14622 if (TARGET_64BIT && mode == DImode
14623 && GET_MODE (XEXP (x, 0)) == SImode)
14625 else if (TARGET_ZERO_EXTEND_WITH_AND)
14626 *total = COSTS_N_INSNS (ix86_cost->add);
14628 *total = COSTS_N_INSNS (ix86_cost->movzx);
14632 *total = COSTS_N_INSNS (ix86_cost->movsx);
14636 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14637 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14639 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14642 *total = COSTS_N_INSNS (ix86_cost->add);
14645 if ((value == 2 || value == 3)
14646 && !TARGET_DECOMPOSE_LEA
14647 && ix86_cost->lea <= ix86_cost->shift_const)
14649 *total = COSTS_N_INSNS (ix86_cost->lea);
14659 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14661 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14663 if (INTVAL (XEXP (x, 1)) > 32)
14664 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14666 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14670 if (GET_CODE (XEXP (x, 1)) == AND)
14671 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14673 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14678 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14679 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14681 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14686 if (FLOAT_MODE_P (mode))
14687 *total = COSTS_N_INSNS (ix86_cost->fmul);
14688 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14690 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14693 for (nbits = 0; value != 0; value >>= 1)
14696 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14697 + nbits * ix86_cost->mult_bit);
14701 /* This is arbitrary */
14702 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14703 + 7 * ix86_cost->mult_bit);
14711 if (FLOAT_MODE_P (mode))
14712 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14714 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14718 if (FLOAT_MODE_P (mode))
14719 *total = COSTS_N_INSNS (ix86_cost->fadd);
14720 else if (!TARGET_DECOMPOSE_LEA
14721 && GET_MODE_CLASS (mode) == MODE_INT
14722 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14724 if (GET_CODE (XEXP (x, 0)) == PLUS
14725 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14726 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14727 && CONSTANT_P (XEXP (x, 1)))
14729 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14730 if (val == 2 || val == 4 || val == 8)
14732 *total = COSTS_N_INSNS (ix86_cost->lea);
14733 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14734 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14736 *total += rtx_cost (XEXP (x, 1), outer_code);
14740 else if (GET_CODE (XEXP (x, 0)) == MULT
14741 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14743 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14744 if (val == 2 || val == 4 || val == 8)
14746 *total = COSTS_N_INSNS (ix86_cost->lea);
14747 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14748 *total += rtx_cost (XEXP (x, 1), outer_code);
14752 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14754 *total = COSTS_N_INSNS (ix86_cost->lea);
14755 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14756 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14757 *total += rtx_cost (XEXP (x, 1), outer_code);
14764 if (FLOAT_MODE_P (mode))
14766 *total = COSTS_N_INSNS (ix86_cost->fadd);
14774 if (!TARGET_64BIT && mode == DImode)
14776 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14777 + (rtx_cost (XEXP (x, 0), outer_code)
14778 << (GET_MODE (XEXP (x, 0)) != DImode))
14779 + (rtx_cost (XEXP (x, 1), outer_code)
14780 << (GET_MODE (XEXP (x, 1)) != DImode)));
14786 if (FLOAT_MODE_P (mode))
14788 *total = COSTS_N_INSNS (ix86_cost->fchs);
14794 if (!TARGET_64BIT && mode == DImode)
14795 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14797 *total = COSTS_N_INSNS (ix86_cost->add);
14801 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14806 if (FLOAT_MODE_P (mode))
14807 *total = COSTS_N_INSNS (ix86_cost->fabs);
14811 if (FLOAT_MODE_P (mode))
14812 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14820 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14822 ix86_svr3_asm_out_constructor (symbol, priority)
14824 int priority ATTRIBUTE_UNUSED;
14827 fputs ("\tpushl $", asm_out_file);
14828 assemble_name (asm_out_file, XSTR (symbol, 0));
14829 fputc ('\n', asm_out_file);
14835 static int current_machopic_label_num;
14837 /* Given a symbol name and its associated stub, write out the
14838 definition of the stub. */
14841 machopic_output_stub (file, symb, stub)
14843 const char *symb, *stub;
14845 unsigned int length;
14846 char *binder_name, *symbol_name, lazy_ptr_name[32];
14847 int label = ++current_machopic_label_num;
14849 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14850 symb = (*targetm.strip_name_encoding) (symb);
14852 length = strlen (stub);
14853 binder_name = alloca (length + 32);
14854 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14856 length = strlen (symb);
14857 symbol_name = alloca (length + 32);
14858 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14860 sprintf (lazy_ptr_name, "L%d$lz", label);
14863 machopic_picsymbol_stub_section ();
14865 machopic_symbol_stub_section ();
14867 fprintf (file, "%s:\n", stub);
14868 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14872 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14873 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14874 fprintf (file, "\tjmp %%edx\n");
14877 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14879 fprintf (file, "%s:\n", binder_name);
14883 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14884 fprintf (file, "\tpushl %%eax\n");
14887 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14889 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14891 machopic_lazy_symbol_ptr_section ();
14892 fprintf (file, "%s:\n", lazy_ptr_name);
14893 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14894 fprintf (file, "\t.long %s\n", binder_name);
14896 #endif /* TARGET_MACHO */
14898 /* Order the registers for register allocator. */
14901 x86_order_regs_for_local_alloc ()
14906 /* First allocate the local general purpose registers. */
14907 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14908 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14909 reg_alloc_order [pos++] = i;
14911 /* Global general purpose registers. */
14912 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14913 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14914 reg_alloc_order [pos++] = i;
14916 /* x87 registers come first in case we are doing FP math
14918 if (!TARGET_SSE_MATH)
14919 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14920 reg_alloc_order [pos++] = i;
14922 /* SSE registers. */
14923 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14924 reg_alloc_order [pos++] = i;
14925 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14926 reg_alloc_order [pos++] = i;
14928 /* x87 registers. */
14929 if (TARGET_SSE_MATH)
14930 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14931 reg_alloc_order [pos++] = i;
14933 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14934 reg_alloc_order [pos++] = i;
14936 /* Initialize the rest of array as we do not allocate some registers
14938 while (pos < FIRST_PSEUDO_REGISTER)
14939 reg_alloc_order [pos++] = 0;
14942 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14943 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14946 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14947 struct attribute_spec.handler. */
14949 ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
14952 tree args ATTRIBUTE_UNUSED;
14953 int flags ATTRIBUTE_UNUSED;
14954 bool *no_add_attrs;
14957 if (DECL_P (*node))
14959 if (TREE_CODE (*node) == TYPE_DECL)
14960 type = &TREE_TYPE (*node);
14965 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14966 || TREE_CODE (*type) == UNION_TYPE)))
14968 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
14969 *no_add_attrs = true;
14972 else if ((is_attribute_p ("ms_struct", name)
14973 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
14974 || ((is_attribute_p ("gcc_struct", name)
14975 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
14977 warning ("`%s' incompatible attribute ignored",
14978 IDENTIFIER_POINTER (name));
14979 *no_add_attrs = true;
14986 ix86_ms_bitfield_layout_p (record_type)
14989 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
14990 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
14991 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
14994 /* Returns an expression indicating where the this parameter is
14995 located on entry to the FUNCTION. */
14998 x86_this_parameter (function)
15001 tree type = TREE_TYPE (function);
15005 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15006 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15009 if (ix86_fntype_regparm (type) > 0)
15013 parm = TYPE_ARG_TYPES (type);
15014 /* Figure out whether or not the function has a variable number of
15016 for (; parm; parm = TREE_CHAIN (parm))
15017 if (TREE_VALUE (parm) == void_type_node)
15019 /* If not, the this parameter is in %eax. */
15021 return gen_rtx_REG (SImode, 0);
15024 if (aggregate_value_p (TREE_TYPE (type)))
15025 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15027 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15030 /* Determine whether x86_output_mi_thunk can succeed. */
15033 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
15034 tree thunk ATTRIBUTE_UNUSED;
15035 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
15036 HOST_WIDE_INT vcall_offset;
15039 /* 64-bit can handle anything. */
15043 /* For 32-bit, everything's fine if we have one free register. */
15044 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15047 /* Need a free register for vcall_offset. */
15051 /* Need a free register for GOT references. */
15052 if (flag_pic && !(*targetm.binds_local_p) (function))
15055 /* Otherwise ok. */
15059 /* Output the assembler code for a thunk function. THUNK_DECL is the
15060 declaration for the thunk function itself, FUNCTION is the decl for
15061 the target function. DELTA is an immediate constant offset to be
15062 added to THIS. If VCALL_OFFSET is nonzero, the word at
15063 *(*this + vcall_offset) should be added to THIS. */
15066 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
15067 FILE *file ATTRIBUTE_UNUSED;
15068 tree thunk ATTRIBUTE_UNUSED;
15069 HOST_WIDE_INT delta;
15070 HOST_WIDE_INT vcall_offset;
15074 rtx this = x86_this_parameter (function);
15077 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15078 pull it in now and let DELTA benefit. */
15081 else if (vcall_offset)
15083 /* Put the this parameter into %eax. */
15085 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15086 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15089 this_reg = NULL_RTX;
15091 /* Adjust the this parameter by a fixed constant. */
15094 xops[0] = GEN_INT (delta);
15095 xops[1] = this_reg ? this_reg : this;
15098 if (!x86_64_general_operand (xops[0], DImode))
15100 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15102 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15106 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15109 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15112 /* Adjust the this parameter by a value stored in the vtable. */
15116 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15118 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15120 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15123 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15125 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15127 /* Adjust the this parameter. */
15128 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15129 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15131 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15132 xops[0] = GEN_INT (vcall_offset);
15134 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15135 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15137 xops[1] = this_reg;
15139 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15141 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15144 /* If necessary, drop THIS back to its stack slot. */
15145 if (this_reg && this_reg != this)
15147 xops[0] = this_reg;
15149 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15152 xops[0] = DECL_RTL (function);
15155 if (!flag_pic || (*targetm.binds_local_p) (function))
15156 output_asm_insn ("jmp\t%P0", xops);
15159 tmp = XEXP (xops[0], 0);
15160 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
15161 tmp = gen_rtx_CONST (Pmode, tmp);
15162 tmp = gen_rtx_MEM (QImode, tmp);
15164 output_asm_insn ("jmp\t%A0", xops);
15169 if (!flag_pic || (*targetm.binds_local_p) (function))
15170 output_asm_insn ("jmp\t%P0", xops);
15175 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15176 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15177 tmp = gen_rtx_MEM (QImode, tmp);
15179 output_asm_insn ("jmp\t%0", xops);
15182 #endif /* TARGET_MACHO */
15184 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15185 output_set_got (tmp);
15188 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15189 output_asm_insn ("jmp\t{*}%1", xops);
15195 x86_field_alignment (field, computed)
15199 enum machine_mode mode;
15200 tree type = TREE_TYPE (field);
15202 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15204 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15205 ? get_inner_array_type (type) : type);
15206 if (mode == DFmode || mode == DCmode
15207 || GET_MODE_CLASS (mode) == MODE_INT
15208 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15209 return MIN (32, computed);
15213 /* Output assembler code to FILE to increment profiler label # LABELNO
15214 for profiling a function entry. */
15216 x86_function_profiler (file, labelno)
15218 int labelno ATTRIBUTE_UNUSED;
15223 #ifndef NO_PROFILE_COUNTERS
15224 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15226 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15230 #ifndef NO_PROFILE_COUNTERS
15231 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15233 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15237 #ifndef NO_PROFILE_COUNTERS
15238 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15239 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15241 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15245 #ifndef NO_PROFILE_COUNTERS
15246 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15247 PROFILE_COUNT_REGISTER);
15249 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15253 /* Implement machine specific optimizations.
15254 At the moment we implement single transformation: AMD Athlon works faster
15255 when RET is not destination of conditional jump or directly preceded
15256 by other jump instruction. We avoid the penalty by inserting NOP just
15257 before the RET instructions in such cases. */
15259 x86_machine_dependent_reorg (first)
15260 rtx first ATTRIBUTE_UNUSED;
15264 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15266 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15268 basic_block bb = e->src;
15271 bool insert = false;
15273 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
15275 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15276 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15278 if (prev && GET_CODE (prev) == CODE_LABEL)
15281 for (e = bb->pred; e; e = e->pred_next)
15282 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15283 && !(e->flags & EDGE_FALLTHRU))
15288 prev = prev_active_insn (ret);
15289 if (prev && GET_CODE (prev) == JUMP_INSN
15290 && any_condjump_p (prev))
15292 /* Empty functions get branch misspredict even when the jump destination
15293 is not visible to us. */
15294 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15298 emit_insn_before (gen_nop (), ret);
15302 /* Return nonzero when QImode register that must be represented via REX prefix
15305 x86_extended_QIreg_mentioned_p (insn)
15309 extract_insn_cached (insn);
15310 for (i = 0; i < recog_data.n_operands; i++)
15311 if (REG_P (recog_data.operand[i])
15312 && REGNO (recog_data.operand[i]) >= 4)
15317 /* Return nonzero when P points to register encoded via REX prefix.
15318 Called via for_each_rtx. */
15320 extended_reg_mentioned_1 (p, data)
15322 void *data ATTRIBUTE_UNUSED;
15324 unsigned int regno;
15327 regno = REGNO (*p);
15328 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15331 /* Return true when INSN mentions register that must be encoded using REX
15334 x86_extended_reg_mentioned_p (insn)
15337 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15340 #include "gt-i386.h"