1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
730 enum cmodel ix86_cmodel;
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
796 static rtx get_thread_pointer (int);
797 static rtx legitimize_tls_address (rtx, enum tls_model, int);
798 static void get_pc_thunk_name (char [32], unsigned int);
799 static rtx gen_push (rtx);
800 static int memory_address_length (rtx addr);
801 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
802 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
803 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
804 static void ix86_dump_ppro_packet (FILE *);
805 static void ix86_reorder_insn (rtx *, rtx *);
806 static struct machine_function * ix86_init_machine_status (void);
807 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
808 static int ix86_nsaved_regs (void);
809 static void ix86_emit_save_regs (void);
810 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
811 static void ix86_emit_restore_regs_using_mov (rtx, int, int);
812 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
813 static void ix86_set_move_mem_attrs_1 (rtx, rtx, rtx, rtx, rtx);
814 static void ix86_sched_reorder_ppro (rtx *, rtx *);
815 static HOST_WIDE_INT ix86_GOT_alias_set (void);
816 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
817 static rtx ix86_expand_aligntest (rtx, int);
818 static void ix86_expand_strlensi_unroll_1 (rtx, rtx);
819 static int ix86_issue_rate (void);
820 static int ix86_adjust_cost (rtx, rtx, rtx, int);
821 static void ix86_sched_init (FILE *, int, int);
822 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
823 static int ix86_variable_issue (FILE *, int, rtx, int);
824 static int ia32_use_dfa_pipeline_interface (void);
825 static int ia32_multipass_dfa_lookahead (void);
826 static void ix86_init_mmx_sse_builtins (void);
827 static rtx x86_this_parameter (tree);
828 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree);
830 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
831 static void x86_file_start (void);
832 static void ix86_reorg (void);
833 bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
837 rtx base, index, disp;
839 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
842 static int ix86_decompose_address (rtx, struct ix86_address *);
843 static int ix86_address_cost (rtx);
844 static bool ix86_cannot_force_const_mem (rtx);
845 static rtx ix86_delegitimize_address (rtx);
847 struct builtin_description;
848 static rtx ix86_expand_sse_comi (const struct builtin_description *,
850 static rtx ix86_expand_sse_compare (const struct builtin_description *,
852 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
853 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
854 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
855 static rtx ix86_expand_store_builtin (enum insn_code, tree);
856 static rtx safe_vector_operand (rtx, enum machine_mode);
857 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
858 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
859 enum rtx_code *, enum rtx_code *);
860 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
861 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
862 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
863 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
864 static int ix86_fp_comparison_cost (enum rtx_code code);
865 static unsigned int ix86_select_alt_pic_regnum (void);
866 static int ix86_save_reg (unsigned int, int);
867 static void ix86_compute_frame_layout (struct ix86_frame *);
868 static int ix86_comp_type_attributes (tree, tree);
869 static int ix86_fntype_regparm (tree);
870 const struct attribute_spec ix86_attribute_table[];
871 static bool ix86_function_ok_for_sibcall (tree, tree);
872 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
873 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
874 static int ix86_value_regno (enum machine_mode);
875 static bool contains_128bit_aligned_vector_p (tree);
876 static bool ix86_ms_bitfield_layout_p (tree);
877 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
878 static int extended_reg_mentioned_1 (rtx *, void *);
879 static bool ix86_rtx_costs (rtx, int, int, int *);
880 static int min_insn_size (rtx);
881 static void k8_avoid_jump_misspredicts (void);
883 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
884 static void ix86_svr3_asm_out_constructor (rtx, int);
887 /* Register class used for passing given 64bit part of the argument.
888 These represent classes as documented by the PS ABI, with the exception
889 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
890 use SF or DFmode move instead of DImode to avoid reformatting penalties.
892 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
893 whenever possible (upper half does contain padding).
895 enum x86_64_reg_class
898 X86_64_INTEGER_CLASS,
899 X86_64_INTEGERSI_CLASS,
908 static const char * const x86_64_reg_class_name[] =
909 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
911 #define MAX_CLASSES 4
912 static int classify_argument (enum machine_mode, tree,
913 enum x86_64_reg_class [MAX_CLASSES], int);
914 static int examine_argument (enum machine_mode, tree, int, int *, int *);
915 static rtx construct_container (enum machine_mode, tree, int, int, int,
917 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
918 enum x86_64_reg_class);
920 /* Table of constants used by fldpi, fldln2, etc... */
921 static REAL_VALUE_TYPE ext_80387_constants_table [5];
922 static bool ext_80387_constants_init = 0;
923 static void init_ext_80387_constants (void);
925 /* Initialize the GCC target structure. */
926 #undef TARGET_ATTRIBUTE_TABLE
927 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
928 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
929 # undef TARGET_MERGE_DECL_ATTRIBUTES
930 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
933 #undef TARGET_COMP_TYPE_ATTRIBUTES
934 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
936 #undef TARGET_INIT_BUILTINS
937 #define TARGET_INIT_BUILTINS ix86_init_builtins
939 #undef TARGET_EXPAND_BUILTIN
940 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
942 #undef TARGET_ASM_FUNCTION_EPILOGUE
943 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
945 #undef TARGET_ASM_OPEN_PAREN
946 #define TARGET_ASM_OPEN_PAREN ""
947 #undef TARGET_ASM_CLOSE_PAREN
948 #define TARGET_ASM_CLOSE_PAREN ""
950 #undef TARGET_ASM_ALIGNED_HI_OP
951 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
952 #undef TARGET_ASM_ALIGNED_SI_OP
953 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
955 #undef TARGET_ASM_ALIGNED_DI_OP
956 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
959 #undef TARGET_ASM_UNALIGNED_HI_OP
960 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
961 #undef TARGET_ASM_UNALIGNED_SI_OP
962 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
963 #undef TARGET_ASM_UNALIGNED_DI_OP
964 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
966 #undef TARGET_SCHED_ADJUST_COST
967 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
968 #undef TARGET_SCHED_ISSUE_RATE
969 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
970 #undef TARGET_SCHED_VARIABLE_ISSUE
971 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
972 #undef TARGET_SCHED_INIT
973 #define TARGET_SCHED_INIT ix86_sched_init
974 #undef TARGET_SCHED_REORDER
975 #define TARGET_SCHED_REORDER ix86_sched_reorder
976 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
977 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
978 ia32_use_dfa_pipeline_interface
979 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
980 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
981 ia32_multipass_dfa_lookahead
983 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
984 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
987 #undef TARGET_HAVE_TLS
988 #define TARGET_HAVE_TLS true
990 #undef TARGET_CANNOT_FORCE_CONST_MEM
991 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
993 #undef TARGET_DELEGITIMIZE_ADDRESS
994 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
996 #undef TARGET_MS_BITFIELD_LAYOUT_P
997 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
999 #undef TARGET_ASM_OUTPUT_MI_THUNK
1000 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1001 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1002 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1004 #undef TARGET_ASM_FILE_START
1005 #define TARGET_ASM_FILE_START x86_file_start
1007 #undef TARGET_RTX_COSTS
1008 #define TARGET_RTX_COSTS ix86_rtx_costs
1009 #undef TARGET_ADDRESS_COST
1010 #define TARGET_ADDRESS_COST ix86_address_cost
1012 #undef TARGET_MACHINE_DEPENDENT_REORG
1013 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1015 struct gcc_target targetm = TARGET_INITIALIZER;
1017 /* The svr4 ABI for the i386 says that records and unions are returned
1019 #ifndef DEFAULT_PCC_STRUCT_RETURN
1020 #define DEFAULT_PCC_STRUCT_RETURN 1
1023 /* Sometimes certain combinations of command options do not make
1024 sense on a particular target machine. You can define a macro
1025 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1026 defined, is executed once just after all the command options have
1029 Don't use this macro to turn on various extra optimizations for
1030 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1033 override_options (void)
1036 /* Comes from final.c -- no real reason to change it. */
1037 #define MAX_CODE_ALIGN 16
1041 const struct processor_costs *cost; /* Processor costs */
1042 const int target_enable; /* Target flags to enable. */
1043 const int target_disable; /* Target flags to disable. */
1044 const int align_loop; /* Default alignments. */
1045 const int align_loop_max_skip;
1046 const int align_jump;
1047 const int align_jump_max_skip;
1048 const int align_func;
1050 const processor_target_table[PROCESSOR_max] =
1052 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1053 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1054 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1055 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1056 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1057 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1058 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1059 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1062 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1065 const char *const name; /* processor name or nickname. */
1066 const enum processor_type processor;
1067 const enum pta_flags
1072 PTA_PREFETCH_SSE = 8,
1078 const processor_alias_table[] =
1080 {"i386", PROCESSOR_I386, 0},
1081 {"i486", PROCESSOR_I486, 0},
1082 {"i586", PROCESSOR_PENTIUM, 0},
1083 {"pentium", PROCESSOR_PENTIUM, 0},
1084 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1085 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1086 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1087 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1088 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1089 {"i686", PROCESSOR_PENTIUMPRO, 0},
1090 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1091 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1092 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1093 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1094 PTA_MMX | PTA_PREFETCH_SSE},
1095 {"k6", PROCESSOR_K6, PTA_MMX},
1096 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1097 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1098 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1100 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1101 | PTA_3DNOW | PTA_3DNOW_A},
1102 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1103 | PTA_3DNOW_A | PTA_SSE},
1104 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1105 | PTA_3DNOW_A | PTA_SSE},
1106 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1107 | PTA_3DNOW_A | PTA_SSE},
1108 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1109 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1112 int const pta_size = ARRAY_SIZE (processor_alias_table);
1114 /* By default our XFmode is the 80-bit extended format. If we have
1115 use TFmode instead, it's also the 80-bit format, but with padding. */
1116 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1117 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1119 /* Set the default values for switches whose default depends on TARGET_64BIT
1120 in case they weren't overwritten by command line options. */
1123 if (flag_omit_frame_pointer == 2)
1124 flag_omit_frame_pointer = 1;
1125 if (flag_asynchronous_unwind_tables == 2)
1126 flag_asynchronous_unwind_tables = 1;
1127 if (flag_pcc_struct_return == 2)
1128 flag_pcc_struct_return = 0;
1132 if (flag_omit_frame_pointer == 2)
1133 flag_omit_frame_pointer = 0;
1134 if (flag_asynchronous_unwind_tables == 2)
1135 flag_asynchronous_unwind_tables = 0;
1136 if (flag_pcc_struct_return == 2)
1137 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1140 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1141 SUBTARGET_OVERRIDE_OPTIONS;
1144 if (!ix86_tune_string && ix86_arch_string)
1145 ix86_tune_string = ix86_arch_string;
1146 if (!ix86_tune_string)
1147 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1148 if (!ix86_arch_string)
1149 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1151 if (ix86_cmodel_string != 0)
1153 if (!strcmp (ix86_cmodel_string, "small"))
1154 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1156 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1157 else if (!strcmp (ix86_cmodel_string, "32"))
1158 ix86_cmodel = CM_32;
1159 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1160 ix86_cmodel = CM_KERNEL;
1161 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1162 ix86_cmodel = CM_MEDIUM;
1163 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1164 ix86_cmodel = CM_LARGE;
1166 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1170 ix86_cmodel = CM_32;
1172 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1174 if (ix86_asm_string != 0)
1176 if (!strcmp (ix86_asm_string, "intel"))
1177 ix86_asm_dialect = ASM_INTEL;
1178 else if (!strcmp (ix86_asm_string, "att"))
1179 ix86_asm_dialect = ASM_ATT;
1181 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1183 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1184 error ("code model `%s' not supported in the %s bit mode",
1185 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1186 if (ix86_cmodel == CM_LARGE)
1187 sorry ("code model `large' not supported yet");
1188 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1189 sorry ("%i-bit mode not compiled in",
1190 (target_flags & MASK_64BIT) ? 64 : 32);
1192 for (i = 0; i < pta_size; i++)
1193 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1195 ix86_arch = processor_alias_table[i].processor;
1196 /* Default cpu tuning to the architecture. */
1197 ix86_tune = ix86_arch;
1198 if (processor_alias_table[i].flags & PTA_MMX
1199 && !(target_flags_explicit & MASK_MMX))
1200 target_flags |= MASK_MMX;
1201 if (processor_alias_table[i].flags & PTA_3DNOW
1202 && !(target_flags_explicit & MASK_3DNOW))
1203 target_flags |= MASK_3DNOW;
1204 if (processor_alias_table[i].flags & PTA_3DNOW_A
1205 && !(target_flags_explicit & MASK_3DNOW_A))
1206 target_flags |= MASK_3DNOW_A;
1207 if (processor_alias_table[i].flags & PTA_SSE
1208 && !(target_flags_explicit & MASK_SSE))
1209 target_flags |= MASK_SSE;
1210 if (processor_alias_table[i].flags & PTA_SSE2
1211 && !(target_flags_explicit & MASK_SSE2))
1212 target_flags |= MASK_SSE2;
1213 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1214 x86_prefetch_sse = true;
1215 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1216 error ("CPU you selected does not support x86-64 instruction set");
1221 error ("bad value (%s) for -march= switch", ix86_arch_string);
1223 for (i = 0; i < pta_size; i++)
1224 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1226 ix86_tune = processor_alias_table[i].processor;
1227 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1228 error ("CPU you selected does not support x86-64 instruction set");
1231 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1232 x86_prefetch_sse = true;
1234 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1237 ix86_cost = &size_cost;
1239 ix86_cost = processor_target_table[ix86_tune].cost;
1240 target_flags |= processor_target_table[ix86_tune].target_enable;
1241 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1243 /* Arrange to set up i386_stack_locals for all functions. */
1244 init_machine_status = ix86_init_machine_status;
1246 /* Validate -mregparm= value. */
1247 if (ix86_regparm_string)
1249 i = atoi (ix86_regparm_string);
1250 if (i < 0 || i > REGPARM_MAX)
1251 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1257 ix86_regparm = REGPARM_MAX;
1259 /* If the user has provided any of the -malign-* options,
1260 warn and use that value only if -falign-* is not set.
1261 Remove this code in GCC 3.2 or later. */
1262 if (ix86_align_loops_string)
1264 warning ("-malign-loops is obsolete, use -falign-loops");
1265 if (align_loops == 0)
1267 i = atoi (ix86_align_loops_string);
1268 if (i < 0 || i > MAX_CODE_ALIGN)
1269 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1271 align_loops = 1 << i;
1275 if (ix86_align_jumps_string)
1277 warning ("-malign-jumps is obsolete, use -falign-jumps");
1278 if (align_jumps == 0)
1280 i = atoi (ix86_align_jumps_string);
1281 if (i < 0 || i > MAX_CODE_ALIGN)
1282 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1284 align_jumps = 1 << i;
1288 if (ix86_align_funcs_string)
1290 warning ("-malign-functions is obsolete, use -falign-functions");
1291 if (align_functions == 0)
1293 i = atoi (ix86_align_funcs_string);
1294 if (i < 0 || i > MAX_CODE_ALIGN)
1295 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1297 align_functions = 1 << i;
1301 /* Default align_* from the processor table. */
1302 if (align_loops == 0)
1304 align_loops = processor_target_table[ix86_tune].align_loop;
1305 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1307 if (align_jumps == 0)
1309 align_jumps = processor_target_table[ix86_tune].align_jump;
1310 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1312 if (align_functions == 0)
1314 align_functions = processor_target_table[ix86_tune].align_func;
1317 /* Validate -mpreferred-stack-boundary= value, or provide default.
1318 The default of 128 bits is for Pentium III's SSE __m128, but we
1319 don't want additional code to keep the stack aligned when
1320 optimizing for code size. */
1321 ix86_preferred_stack_boundary = (optimize_size
1322 ? TARGET_64BIT ? 128 : 32
1324 if (ix86_preferred_stack_boundary_string)
1326 i = atoi (ix86_preferred_stack_boundary_string);
1327 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1328 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1329 TARGET_64BIT ? 4 : 2);
1331 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1334 /* Validate -mbranch-cost= value, or provide default. */
1335 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1336 if (ix86_branch_cost_string)
1338 i = atoi (ix86_branch_cost_string);
1340 error ("-mbranch-cost=%d is not between 0 and 5", i);
1342 ix86_branch_cost = i;
1345 if (ix86_tls_dialect_string)
1347 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1348 ix86_tls_dialect = TLS_DIALECT_GNU;
1349 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1350 ix86_tls_dialect = TLS_DIALECT_SUN;
1352 error ("bad value (%s) for -mtls-dialect= switch",
1353 ix86_tls_dialect_string);
1356 /* Keep nonleaf frame pointers. */
1357 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1358 flag_omit_frame_pointer = 1;
1360 /* If we're doing fast math, we don't care about comparison order
1361 wrt NaNs. This lets us use a shorter comparison sequence. */
1362 if (flag_unsafe_math_optimizations)
1363 target_flags &= ~MASK_IEEE_FP;
1365 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1366 since the insns won't need emulation. */
1367 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1368 target_flags &= ~MASK_NO_FANCY_MATH_387;
1370 /* Turn on SSE2 builtins for -mpni. */
1372 target_flags |= MASK_SSE2;
1374 /* Turn on SSE builtins for -msse2. */
1376 target_flags |= MASK_SSE;
1380 if (TARGET_ALIGN_DOUBLE)
1381 error ("-malign-double makes no sense in the 64bit mode");
1383 error ("-mrtd calling convention not supported in the 64bit mode");
1384 /* Enable by default the SSE and MMX builtins. */
1385 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1386 ix86_fpmath = FPMATH_SSE;
1390 ix86_fpmath = FPMATH_387;
1391 /* i386 ABI does not specify red zone. It still makes sense to use it
1392 when programmer takes care to stack from being destroyed. */
1393 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1394 target_flags |= MASK_NO_RED_ZONE;
1397 if (ix86_fpmath_string != 0)
1399 if (! strcmp (ix86_fpmath_string, "387"))
1400 ix86_fpmath = FPMATH_387;
1401 else if (! strcmp (ix86_fpmath_string, "sse"))
1405 warning ("SSE instruction set disabled, using 387 arithmetics");
1406 ix86_fpmath = FPMATH_387;
1409 ix86_fpmath = FPMATH_SSE;
1411 else if (! strcmp (ix86_fpmath_string, "387,sse")
1412 || ! strcmp (ix86_fpmath_string, "sse,387"))
1416 warning ("SSE instruction set disabled, using 387 arithmetics");
1417 ix86_fpmath = FPMATH_387;
1419 else if (!TARGET_80387)
1421 warning ("387 instruction set disabled, using SSE arithmetics");
1422 ix86_fpmath = FPMATH_SSE;
1425 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1428 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1431 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1435 target_flags |= MASK_MMX;
1436 x86_prefetch_sse = true;
1439 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1442 target_flags |= MASK_MMX;
1443 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1444 extensions it adds. */
1445 if (x86_3dnow_a & (1 << ix86_arch))
1446 target_flags |= MASK_3DNOW_A;
1448 if ((x86_accumulate_outgoing_args & TUNEMASK)
1449 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1451 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1453 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1456 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1457 p = strchr (internal_label_prefix, 'X');
1458 internal_label_prefix_len = p - internal_label_prefix;
1464 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1466 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1467 make the problem with not enough registers even worse. */
1468 #ifdef INSN_SCHEDULING
1470 flag_schedule_insns = 0;
1473 /* The default values of these switches depend on the TARGET_64BIT
1474 that is not known at this moment. Mark these values with 2 and
1475 let user the to override these. In case there is no command line option
1476 specifying them, we will set the defaults in override_options. */
1478 flag_omit_frame_pointer = 2;
1479 flag_pcc_struct_return = 2;
1480 flag_asynchronous_unwind_tables = 2;
1483 /* Table of valid machine attributes. */
1484 const struct attribute_spec ix86_attribute_table[] =
1486 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1487 /* Stdcall attribute says callee is responsible for popping arguments
1488 if they are not variable. */
1489 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1490 /* Fastcall attribute says callee is responsible for popping arguments
1491 if they are not variable. */
1492 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1493 /* Cdecl attribute says the callee is a normal C declaration */
1494 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1495 /* Regparm attribute specifies how many integer arguments are to be
1496 passed in registers. */
1497 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1498 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1499 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1500 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1501 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1503 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1504 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1505 { NULL, 0, 0, false, false, false, NULL }
1508 /* Decide whether we can make a sibling call to a function. DECL is the
1509 declaration of the function being targeted by the call and EXP is the
1510 CALL_EXPR representing the call. */
1513 ix86_function_ok_for_sibcall (tree decl, tree exp)
1515 /* If we are generating position-independent code, we cannot sibcall
1516 optimize any indirect call, or a direct call to a global function,
1517 as the PLT requires %ebx be live. */
1518 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1521 /* If we are returning floats on the 80387 register stack, we cannot
1522 make a sibcall from a function that doesn't return a float to a
1523 function that does or, conversely, from a function that does return
1524 a float to a function that doesn't; the necessary stack adjustment
1525 would not be executed. */
1526 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1527 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1530 /* If this call is indirect, we'll need to be able to use a call-clobbered
1531 register for the address of the target function. Make sure that all
1532 such registers are not used for passing parameters. */
1533 if (!decl && !TARGET_64BIT)
1535 int regparm = ix86_regparm;
1538 /* We're looking at the CALL_EXPR, we need the type of the function. */
1539 type = TREE_OPERAND (exp, 0); /* pointer expression */
1540 type = TREE_TYPE (type); /* pointer type */
1541 type = TREE_TYPE (type); /* function type */
1543 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1545 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1549 /* ??? Need to count the actual number of registers to be used,
1550 not the possible number of registers. Fix later. */
1555 /* Otherwise okay. That also includes certain types of indirect calls. */
1559 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1560 arguments as in struct attribute_spec.handler. */
1562 ix86_handle_cdecl_attribute (tree *node, tree name,
1563 tree args ATTRIBUTE_UNUSED,
1564 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1566 if (TREE_CODE (*node) != FUNCTION_TYPE
1567 && TREE_CODE (*node) != METHOD_TYPE
1568 && TREE_CODE (*node) != FIELD_DECL
1569 && TREE_CODE (*node) != TYPE_DECL)
1571 warning ("`%s' attribute only applies to functions",
1572 IDENTIFIER_POINTER (name));
1573 *no_add_attrs = true;
1577 if (is_attribute_p ("fastcall", name))
1579 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1581 error ("fastcall and stdcall attributes are not compatible");
1583 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1585 error ("fastcall and regparm attributes are not compatible");
1588 else if (is_attribute_p ("stdcall", name))
1590 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1592 error ("fastcall and stdcall attributes are not compatible");
1599 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1600 *no_add_attrs = true;
1606 /* Handle a "regparm" attribute;
1607 arguments as in struct attribute_spec.handler. */
1609 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1610 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1612 if (TREE_CODE (*node) != FUNCTION_TYPE
1613 && TREE_CODE (*node) != METHOD_TYPE
1614 && TREE_CODE (*node) != FIELD_DECL
1615 && TREE_CODE (*node) != TYPE_DECL)
1617 warning ("`%s' attribute only applies to functions",
1618 IDENTIFIER_POINTER (name));
1619 *no_add_attrs = true;
1625 cst = TREE_VALUE (args);
1626 if (TREE_CODE (cst) != INTEGER_CST)
1628 warning ("`%s' attribute requires an integer constant argument",
1629 IDENTIFIER_POINTER (name));
1630 *no_add_attrs = true;
1632 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1634 warning ("argument to `%s' attribute larger than %d",
1635 IDENTIFIER_POINTER (name), REGPARM_MAX);
1636 *no_add_attrs = true;
1639 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1641 error ("fastcall and regparm attributes are not compatible");
1648 /* Return 0 if the attributes for two types are incompatible, 1 if they
1649 are compatible, and 2 if they are nearly compatible (which causes a
1650 warning to be generated). */
1653 ix86_comp_type_attributes (tree type1, tree type2)
1655 /* Check for mismatch of non-default calling convention. */
1656 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1658 if (TREE_CODE (type1) != FUNCTION_TYPE)
1661 /* Check for mismatched fastcall types */
1662 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1663 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1666 /* Check for mismatched return types (cdecl vs stdcall). */
1667 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1668 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1673 /* Return the regparm value for a fuctio with the indicated TYPE. */
1676 ix86_fntype_regparm (tree type)
1680 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1682 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1684 return ix86_regparm;
1687 /* Value is the number of bytes of arguments automatically
1688 popped when returning from a subroutine call.
1689 FUNDECL is the declaration node of the function (as a tree),
1690 FUNTYPE is the data type of the function (as a tree),
1691 or for a library call it is an identifier node for the subroutine name.
1692 SIZE is the number of bytes of arguments passed on the stack.
1694 On the 80386, the RTD insn may be used to pop them if the number
1695 of args is fixed, but if the number is variable then the caller
1696 must pop them all. RTD can't be used for library calls now
1697 because the library is compiled with the Unix compiler.
1698 Use of RTD is a selectable option, since it is incompatible with
1699 standard Unix calling sequences. If the option is not selected,
1700 the caller must always pop the args.
1702 The attribute stdcall is equivalent to RTD on a per module basis. */
1705 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1707 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1709 /* Cdecl functions override -mrtd, and never pop the stack. */
1710 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1712 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1713 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1714 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1718 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1719 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1720 == void_type_node)))
1724 /* Lose any fake structure return argument if it is passed on the stack. */
1725 if (aggregate_value_p (TREE_TYPE (funtype))
1728 int nregs = ix86_fntype_regparm (funtype);
1731 return GET_MODE_SIZE (Pmode);
1737 /* Argument support functions. */
1739 /* Return true when register may be used to pass function parameters. */
1741 ix86_function_arg_regno_p (int regno)
1745 return (regno < REGPARM_MAX
1746 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1747 if (SSE_REGNO_P (regno) && TARGET_SSE)
1749 /* RAX is used as hidden argument to va_arg functions. */
1752 for (i = 0; i < REGPARM_MAX; i++)
1753 if (regno == x86_64_int_parameter_registers[i])
1758 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1759 for a call to a function whose data type is FNTYPE.
1760 For a library call, FNTYPE is 0. */
1763 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1764 tree fntype, /* tree ptr for function decl */
1765 rtx libname, /* SYMBOL_REF of library name or 0 */
1768 static CUMULATIVE_ARGS zero_cum;
1769 tree param, next_param;
1770 bool user_convention = false;
1772 if (TARGET_DEBUG_ARG)
1774 fprintf (stderr, "\ninit_cumulative_args (");
1776 fprintf (stderr, "fntype code = %s, ret code = %s",
1777 tree_code_name[(int) TREE_CODE (fntype)],
1778 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1780 fprintf (stderr, "no fntype");
1783 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1788 /* Set up the number of registers to use for passing arguments. */
1789 cum->nregs = ix86_regparm;
1790 cum->sse_nregs = SSE_REGPARM_MAX;
1791 if (fntype && !TARGET_64BIT)
1793 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1797 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1798 user_convention = true;
1801 cum->maybe_vaarg = false;
1803 /* Use ecx and edx registers if function has fastcall attribute */
1804 if (fntype && !TARGET_64BIT)
1806 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1810 user_convention = true;
1814 /* Use register calling convention for local functions when possible. */
1815 if (!TARGET_64BIT && !user_convention && fndecl
1816 && flag_unit_at_a_time)
1818 struct cgraph_local_info *i = cgraph_local_info (fndecl);
1821 /* We can't use regparm(3) for nested functions as these use
1822 static chain pointer in third argument. */
1823 if (DECL_CONTEXT (fndecl) && !DECL_NO_STATIC_CHAIN (fndecl))
1831 /* Determine if this function has variable arguments. This is
1832 indicated by the last argument being 'void_type_mode' if there
1833 are no variable arguments. If there are variable arguments, then
1834 we won't pass anything in registers */
1838 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1839 param != 0; param = next_param)
1841 next_param = TREE_CHAIN (param);
1842 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1849 cum->maybe_vaarg = true;
1853 if ((!fntype && !libname)
1854 || (fntype && !TYPE_ARG_TYPES (fntype)))
1855 cum->maybe_vaarg = 1;
1857 if (TARGET_DEBUG_ARG)
1858 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1863 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1864 of this code is to classify each 8bytes of incoming argument by the register
1865 class and assign registers accordingly. */
1867 /* Return the union class of CLASS1 and CLASS2.
1868 See the x86-64 PS ABI for details. */
1870 static enum x86_64_reg_class
1871 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1873 /* Rule #1: If both classes are equal, this is the resulting class. */
1874 if (class1 == class2)
1877 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1879 if (class1 == X86_64_NO_CLASS)
1881 if (class2 == X86_64_NO_CLASS)
1884 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1885 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1886 return X86_64_MEMORY_CLASS;
1888 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1889 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1890 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1891 return X86_64_INTEGERSI_CLASS;
1892 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1893 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1894 return X86_64_INTEGER_CLASS;
1896 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1897 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1898 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1899 return X86_64_MEMORY_CLASS;
1901 /* Rule #6: Otherwise class SSE is used. */
1902 return X86_64_SSE_CLASS;
1905 /* Classify the argument of type TYPE and mode MODE.
1906 CLASSES will be filled by the register class used to pass each word
1907 of the operand. The number of words is returned. In case the parameter
1908 should be passed in memory, 0 is returned. As a special case for zero
1909 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1911 BIT_OFFSET is used internally for handling records and specifies offset
1912 of the offset in bits modulo 256 to avoid overflow cases.
1914 See the x86-64 PS ABI for details.
1918 classify_argument (enum machine_mode mode, tree type,
1919 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1922 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1923 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1925 /* Variable sized entities are always passed/returned in memory. */
1929 if (mode != VOIDmode
1930 && MUST_PASS_IN_STACK (mode, type))
1933 if (type && AGGREGATE_TYPE_P (type))
1937 enum x86_64_reg_class subclasses[MAX_CLASSES];
1939 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1943 for (i = 0; i < words; i++)
1944 classes[i] = X86_64_NO_CLASS;
1946 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1947 signalize memory class, so handle it as special case. */
1950 classes[0] = X86_64_NO_CLASS;
1954 /* Classify each field of record and merge classes. */
1955 if (TREE_CODE (type) == RECORD_TYPE)
1957 /* For classes first merge in the field of the subclasses. */
1958 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1960 tree bases = TYPE_BINFO_BASETYPES (type);
1961 int n_bases = TREE_VEC_LENGTH (bases);
1964 for (i = 0; i < n_bases; ++i)
1966 tree binfo = TREE_VEC_ELT (bases, i);
1968 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1969 tree type = BINFO_TYPE (binfo);
1971 num = classify_argument (TYPE_MODE (type),
1973 (offset + bit_offset) % 256);
1976 for (i = 0; i < num; i++)
1978 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1980 merge_classes (subclasses[i], classes[i + pos]);
1984 /* And now merge the fields of structure. */
1985 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1987 if (TREE_CODE (field) == FIELD_DECL)
1991 /* Bitfields are always classified as integer. Handle them
1992 early, since later code would consider them to be
1993 misaligned integers. */
1994 if (DECL_BIT_FIELD (field))
1996 for (i = int_bit_position (field) / 8 / 8;
1997 i < (int_bit_position (field)
1998 + tree_low_cst (DECL_SIZE (field), 0)
2001 merge_classes (X86_64_INTEGER_CLASS,
2006 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2007 TREE_TYPE (field), subclasses,
2008 (int_bit_position (field)
2009 + bit_offset) % 256);
2012 for (i = 0; i < num; i++)
2015 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2017 merge_classes (subclasses[i], classes[i + pos]);
2023 /* Arrays are handled as small records. */
2024 else if (TREE_CODE (type) == ARRAY_TYPE)
2027 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2028 TREE_TYPE (type), subclasses, bit_offset);
2032 /* The partial classes are now full classes. */
2033 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2034 subclasses[0] = X86_64_SSE_CLASS;
2035 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2036 subclasses[0] = X86_64_INTEGER_CLASS;
2038 for (i = 0; i < words; i++)
2039 classes[i] = subclasses[i % num];
2041 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2042 else if (TREE_CODE (type) == UNION_TYPE
2043 || TREE_CODE (type) == QUAL_UNION_TYPE)
2045 /* For classes first merge in the field of the subclasses. */
2046 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2048 tree bases = TYPE_BINFO_BASETYPES (type);
2049 int n_bases = TREE_VEC_LENGTH (bases);
2052 for (i = 0; i < n_bases; ++i)
2054 tree binfo = TREE_VEC_ELT (bases, i);
2056 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2057 tree type = BINFO_TYPE (binfo);
2059 num = classify_argument (TYPE_MODE (type),
2061 (offset + (bit_offset % 64)) % 256);
2064 for (i = 0; i < num; i++)
2066 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2068 merge_classes (subclasses[i], classes[i + pos]);
2072 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2074 if (TREE_CODE (field) == FIELD_DECL)
2077 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2078 TREE_TYPE (field), subclasses,
2082 for (i = 0; i < num; i++)
2083 classes[i] = merge_classes (subclasses[i], classes[i]);
2090 /* Final merger cleanup. */
2091 for (i = 0; i < words; i++)
2093 /* If one class is MEMORY, everything should be passed in
2095 if (classes[i] == X86_64_MEMORY_CLASS)
2098 /* The X86_64_SSEUP_CLASS should be always preceded by
2099 X86_64_SSE_CLASS. */
2100 if (classes[i] == X86_64_SSEUP_CLASS
2101 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2102 classes[i] = X86_64_SSE_CLASS;
2104 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2105 if (classes[i] == X86_64_X87UP_CLASS
2106 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2107 classes[i] = X86_64_SSE_CLASS;
2112 /* Compute alignment needed. We align all types to natural boundaries with
2113 exception of XFmode that is aligned to 64bits. */
2114 if (mode != VOIDmode && mode != BLKmode)
2116 int mode_alignment = GET_MODE_BITSIZE (mode);
2119 mode_alignment = 128;
2120 else if (mode == XCmode)
2121 mode_alignment = 256;
2122 /* Misaligned fields are always returned in memory. */
2123 if (bit_offset % mode_alignment)
2127 /* Classification of atomic types. */
2137 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2138 classes[0] = X86_64_INTEGERSI_CLASS;
2140 classes[0] = X86_64_INTEGER_CLASS;
2144 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2147 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2148 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2151 if (!(bit_offset % 64))
2152 classes[0] = X86_64_SSESF_CLASS;
2154 classes[0] = X86_64_SSE_CLASS;
2157 classes[0] = X86_64_SSEDF_CLASS;
2160 classes[0] = X86_64_X87_CLASS;
2161 classes[1] = X86_64_X87UP_CLASS;
2164 classes[0] = X86_64_X87_CLASS;
2165 classes[1] = X86_64_X87UP_CLASS;
2166 classes[2] = X86_64_X87_CLASS;
2167 classes[3] = X86_64_X87UP_CLASS;
2170 classes[0] = X86_64_SSEDF_CLASS;
2171 classes[1] = X86_64_SSEDF_CLASS;
2174 classes[0] = X86_64_SSE_CLASS;
2182 classes[0] = X86_64_SSE_CLASS;
2183 classes[1] = X86_64_SSEUP_CLASS;
2198 /* Examine the argument and return set number of register required in each
2199 class. Return 0 iff parameter should be passed in memory. */
2201 examine_argument (enum machine_mode mode, tree type, int in_return,
2202 int *int_nregs, int *sse_nregs)
2204 enum x86_64_reg_class class[MAX_CLASSES];
2205 int n = classify_argument (mode, type, class, 0);
2211 for (n--; n >= 0; n--)
2214 case X86_64_INTEGER_CLASS:
2215 case X86_64_INTEGERSI_CLASS:
2218 case X86_64_SSE_CLASS:
2219 case X86_64_SSESF_CLASS:
2220 case X86_64_SSEDF_CLASS:
2223 case X86_64_NO_CLASS:
2224 case X86_64_SSEUP_CLASS:
2226 case X86_64_X87_CLASS:
2227 case X86_64_X87UP_CLASS:
2231 case X86_64_MEMORY_CLASS:
2236 /* Construct container for the argument used by GCC interface. See
2237 FUNCTION_ARG for the detailed description. */
2239 construct_container (enum machine_mode mode, tree type, int in_return,
2240 int nintregs, int nsseregs, const int * intreg,
2243 enum machine_mode tmpmode;
2245 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2246 enum x86_64_reg_class class[MAX_CLASSES];
2250 int needed_sseregs, needed_intregs;
2251 rtx exp[MAX_CLASSES];
2254 n = classify_argument (mode, type, class, 0);
2255 if (TARGET_DEBUG_ARG)
2258 fprintf (stderr, "Memory class\n");
2261 fprintf (stderr, "Classes:");
2262 for (i = 0; i < n; i++)
2264 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2266 fprintf (stderr, "\n");
2271 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2273 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2276 /* First construct simple cases. Avoid SCmode, since we want to use
2277 single register to pass this type. */
2278 if (n == 1 && mode != SCmode)
2281 case X86_64_INTEGER_CLASS:
2282 case X86_64_INTEGERSI_CLASS:
2283 return gen_rtx_REG (mode, intreg[0]);
2284 case X86_64_SSE_CLASS:
2285 case X86_64_SSESF_CLASS:
2286 case X86_64_SSEDF_CLASS:
2287 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2288 case X86_64_X87_CLASS:
2289 return gen_rtx_REG (mode, FIRST_STACK_REG);
2290 case X86_64_NO_CLASS:
2291 /* Zero sized array, struct or class. */
2296 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2297 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2299 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2300 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2301 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2302 && class[1] == X86_64_INTEGER_CLASS
2303 && (mode == CDImode || mode == TImode)
2304 && intreg[0] + 1 == intreg[1])
2305 return gen_rtx_REG (mode, intreg[0]);
2307 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2308 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2309 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2311 /* Otherwise figure out the entries of the PARALLEL. */
2312 for (i = 0; i < n; i++)
2316 case X86_64_NO_CLASS:
2318 case X86_64_INTEGER_CLASS:
2319 case X86_64_INTEGERSI_CLASS:
2320 /* Merge TImodes on aligned occasions here too. */
2321 if (i * 8 + 8 > bytes)
2322 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2323 else if (class[i] == X86_64_INTEGERSI_CLASS)
2327 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2328 if (tmpmode == BLKmode)
2330 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2331 gen_rtx_REG (tmpmode, *intreg),
2335 case X86_64_SSESF_CLASS:
2336 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2337 gen_rtx_REG (SFmode,
2338 SSE_REGNO (sse_regno)),
2342 case X86_64_SSEDF_CLASS:
2343 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2344 gen_rtx_REG (DFmode,
2345 SSE_REGNO (sse_regno)),
2349 case X86_64_SSE_CLASS:
2350 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2354 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2355 gen_rtx_REG (tmpmode,
2356 SSE_REGNO (sse_regno)),
2358 if (tmpmode == TImode)
2366 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2367 for (i = 0; i < nexps; i++)
2368 XVECEXP (ret, 0, i) = exp [i];
2372 /* Update the data in CUM to advance over an argument
2373 of mode MODE and data type TYPE.
2374 (TYPE is null for libcalls where that information may not be available.) */
2377 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2378 enum machine_mode mode, /* current arg mode */
2379 tree type, /* type of the argument or 0 if lib support */
2380 int named) /* whether or not the argument was named */
2383 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2384 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2386 if (TARGET_DEBUG_ARG)
2388 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2389 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2392 int int_nregs, sse_nregs;
2393 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2394 cum->words += words;
2395 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2397 cum->nregs -= int_nregs;
2398 cum->sse_nregs -= sse_nregs;
2399 cum->regno += int_nregs;
2400 cum->sse_regno += sse_nregs;
2403 cum->words += words;
2407 if (TARGET_SSE && mode == TImode)
2409 cum->sse_words += words;
2410 cum->sse_nregs -= 1;
2411 cum->sse_regno += 1;
2412 if (cum->sse_nregs <= 0)
2420 cum->words += words;
2421 cum->nregs -= words;
2422 cum->regno += words;
2424 if (cum->nregs <= 0)
2434 /* Define where to put the arguments to a function.
2435 Value is zero to push the argument on the stack,
2436 or a hard register in which to store the argument.
2438 MODE is the argument's machine mode.
2439 TYPE is the data type of the argument (as a tree).
2440 This is null for libcalls where that information may
2442 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2443 the preceding args and about the function being called.
2444 NAMED is nonzero if this argument is a named parameter
2445 (otherwise it is an extra parameter matching an ellipsis). */
2448 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2449 enum machine_mode mode, /* current arg mode */
2450 tree type, /* type of the argument or 0 if lib support */
2451 int named) /* != 0 for normal args, == 0 for ... args */
2455 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2456 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2458 /* Handle a hidden AL argument containing number of registers for varargs
2459 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2461 if (mode == VOIDmode)
2464 return GEN_INT (cum->maybe_vaarg
2465 ? (cum->sse_nregs < 0
2473 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2474 &x86_64_int_parameter_registers [cum->regno],
2479 /* For now, pass fp/complex values on the stack. */
2491 if (words <= cum->nregs)
2493 int regno = cum->regno;
2495 /* Fastcall allocates the first two DWORD (SImode) or
2496 smaller arguments to ECX and EDX. */
2499 if (mode == BLKmode || mode == DImode)
2502 /* ECX not EAX is the first allocated register. */
2506 ret = gen_rtx_REG (mode, regno);
2511 ret = gen_rtx_REG (mode, cum->sse_regno);
2515 if (TARGET_DEBUG_ARG)
2518 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2519 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2522 print_simple_rtl (stderr, ret);
2524 fprintf (stderr, ", stack");
2526 fprintf (stderr, " )\n");
2532 /* A C expression that indicates when an argument must be passed by
2533 reference. If nonzero for an argument, a copy of that argument is
2534 made in memory and a pointer to the argument is passed instead of
2535 the argument itself. The pointer is passed in whatever way is
2536 appropriate for passing a pointer to that type. */
2539 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2540 enum machine_mode mode ATTRIBUTE_UNUSED,
2541 tree type, int named ATTRIBUTE_UNUSED)
2546 if (type && int_size_in_bytes (type) == -1)
2548 if (TARGET_DEBUG_ARG)
2549 fprintf (stderr, "function_arg_pass_by_reference\n");
2556 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2559 contains_128bit_aligned_vector_p (tree type)
2561 enum machine_mode mode = TYPE_MODE (type);
2562 if (SSE_REG_MODE_P (mode)
2563 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2565 if (TYPE_ALIGN (type) < 128)
2568 if (AGGREGATE_TYPE_P (type))
2570 /* Walk the aggregates recursively. */
2571 if (TREE_CODE (type) == RECORD_TYPE
2572 || TREE_CODE (type) == UNION_TYPE
2573 || TREE_CODE (type) == QUAL_UNION_TYPE)
2577 if (TYPE_BINFO (type) != NULL
2578 && TYPE_BINFO_BASETYPES (type) != NULL)
2580 tree bases = TYPE_BINFO_BASETYPES (type);
2581 int n_bases = TREE_VEC_LENGTH (bases);
2584 for (i = 0; i < n_bases; ++i)
2586 tree binfo = TREE_VEC_ELT (bases, i);
2587 tree type = BINFO_TYPE (binfo);
2589 if (contains_128bit_aligned_vector_p (type))
2593 /* And now merge the fields of structure. */
2594 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2596 if (TREE_CODE (field) == FIELD_DECL
2597 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2601 /* Just for use if some languages passes arrays by value. */
2602 else if (TREE_CODE (type) == ARRAY_TYPE)
2604 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2613 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2617 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2621 align = TYPE_ALIGN (type);
2623 align = GET_MODE_ALIGNMENT (mode);
2624 if (align < PARM_BOUNDARY)
2625 align = PARM_BOUNDARY;
2628 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2629 make an exception for SSE modes since these require 128bit
2632 The handling here differs from field_alignment. ICC aligns MMX
2633 arguments to 4 byte boundaries, while structure fields are aligned
2634 to 8 byte boundaries. */
2637 if (!SSE_REG_MODE_P (mode))
2638 align = PARM_BOUNDARY;
2642 if (!contains_128bit_aligned_vector_p (type))
2643 align = PARM_BOUNDARY;
2645 if (align != PARM_BOUNDARY && !TARGET_SSE)
2653 /* Return true if N is a possible register number of function value. */
2655 ix86_function_value_regno_p (int regno)
2659 return ((regno) == 0
2660 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2661 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2663 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2664 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2665 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2668 /* Define how to find the value returned by a function.
2669 VALTYPE is the data type of the value (as a tree).
2670 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2671 otherwise, FUNC is 0. */
2673 ix86_function_value (tree valtype)
2677 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2678 REGPARM_MAX, SSE_REGPARM_MAX,
2679 x86_64_int_return_registers, 0);
2680 /* For zero sized structures, construct_container return NULL, but we need
2681 to keep rest of compiler happy by returning meaningful value. */
2683 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2687 return gen_rtx_REG (TYPE_MODE (valtype),
2688 ix86_value_regno (TYPE_MODE (valtype)));
2691 /* Return false iff type is returned in memory. */
2693 ix86_return_in_memory (tree type)
2695 int needed_intregs, needed_sseregs;
2698 return !examine_argument (TYPE_MODE (type), type, 1,
2699 &needed_intregs, &needed_sseregs);
2703 if (TYPE_MODE (type) == BLKmode)
2705 else if (MS_AGGREGATE_RETURN
2706 && AGGREGATE_TYPE_P (type)
2707 && int_size_in_bytes(type) <= 8)
2709 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2710 && int_size_in_bytes (type) == 8)
2711 || (int_size_in_bytes (type) > 12
2712 && TYPE_MODE (type) != TImode
2713 && TYPE_MODE (type) != TFmode
2714 && !VECTOR_MODE_P (TYPE_MODE (type))))
2720 /* Define how to find the value returned by a library function
2721 assuming the value has mode MODE. */
2723 ix86_libcall_value (enum machine_mode mode)
2733 return gen_rtx_REG (mode, FIRST_SSE_REG);
2736 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2738 return gen_rtx_REG (mode, 0);
2742 return gen_rtx_REG (mode, ix86_value_regno (mode));
2745 /* Given a mode, return the register to use for a return value. */
2748 ix86_value_regno (enum machine_mode mode)
2750 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2751 return FIRST_FLOAT_REG;
2752 if (mode == TImode || VECTOR_MODE_P (mode))
2753 return FIRST_SSE_REG;
2757 /* Create the va_list data type. */
2760 ix86_build_va_list (void)
2762 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2764 /* For i386 we use plain pointer to argument area. */
2766 return build_pointer_type (char_type_node);
2768 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2769 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2771 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2772 unsigned_type_node);
2773 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2774 unsigned_type_node);
2775 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2777 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2780 DECL_FIELD_CONTEXT (f_gpr) = record;
2781 DECL_FIELD_CONTEXT (f_fpr) = record;
2782 DECL_FIELD_CONTEXT (f_ovf) = record;
2783 DECL_FIELD_CONTEXT (f_sav) = record;
2785 TREE_CHAIN (record) = type_decl;
2786 TYPE_NAME (record) = type_decl;
2787 TYPE_FIELDS (record) = f_gpr;
2788 TREE_CHAIN (f_gpr) = f_fpr;
2789 TREE_CHAIN (f_fpr) = f_ovf;
2790 TREE_CHAIN (f_ovf) = f_sav;
2792 layout_type (record);
2794 /* The correct type is an array type of one element. */
2795 return build_array_type (record, build_index_type (size_zero_node));
2798 /* Perform any needed actions needed for a function that is receiving a
2799 variable number of arguments.
2803 MODE and TYPE are the mode and type of the current parameter.
2805 PRETEND_SIZE is a variable that should be set to the amount of stack
2806 that must be pushed by the prolog to pretend that our caller pushed
2809 Normally, this macro will push all remaining incoming registers on the
2810 stack and set PRETEND_SIZE to the length of the registers pushed. */
2813 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2814 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2817 CUMULATIVE_ARGS next_cum;
2818 rtx save_area = NULL_RTX, mem;
2831 /* Indicate to allocate space on the stack for varargs save area. */
2832 ix86_save_varrargs_registers = 1;
2834 cfun->stack_alignment_needed = 128;
2836 fntype = TREE_TYPE (current_function_decl);
2837 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2838 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2839 != void_type_node));
2841 /* For varargs, we do not want to skip the dummy va_dcl argument.
2842 For stdargs, we do want to skip the last named argument. */
2845 function_arg_advance (&next_cum, mode, type, 1);
2848 save_area = frame_pointer_rtx;
2850 set = get_varargs_alias_set ();
2852 for (i = next_cum.regno; i < ix86_regparm; i++)
2854 mem = gen_rtx_MEM (Pmode,
2855 plus_constant (save_area, i * UNITS_PER_WORD));
2856 set_mem_alias_set (mem, set);
2857 emit_move_insn (mem, gen_rtx_REG (Pmode,
2858 x86_64_int_parameter_registers[i]));
2861 if (next_cum.sse_nregs)
2863 /* Now emit code to save SSE registers. The AX parameter contains number
2864 of SSE parameter registers used to call this function. We use
2865 sse_prologue_save insn template that produces computed jump across
2866 SSE saves. We need some preparation work to get this working. */
2868 label = gen_label_rtx ();
2869 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2871 /* Compute address to jump to :
2872 label - 5*eax + nnamed_sse_arguments*5 */
2873 tmp_reg = gen_reg_rtx (Pmode);
2874 nsse_reg = gen_reg_rtx (Pmode);
2875 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2876 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2877 gen_rtx_MULT (Pmode, nsse_reg,
2879 if (next_cum.sse_regno)
2882 gen_rtx_CONST (DImode,
2883 gen_rtx_PLUS (DImode,
2885 GEN_INT (next_cum.sse_regno * 4))));
2887 emit_move_insn (nsse_reg, label_ref);
2888 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2890 /* Compute address of memory block we save into. We always use pointer
2891 pointing 127 bytes after first byte to store - this is needed to keep
2892 instruction size limited by 4 bytes. */
2893 tmp_reg = gen_reg_rtx (Pmode);
2894 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2895 plus_constant (save_area,
2896 8 * REGPARM_MAX + 127)));
2897 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2898 set_mem_alias_set (mem, set);
2899 set_mem_align (mem, BITS_PER_WORD);
2901 /* And finally do the dirty job! */
2902 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2903 GEN_INT (next_cum.sse_regno), label));
2908 /* Implement va_start. */
2911 ix86_va_start (tree valist, rtx nextarg)
2913 HOST_WIDE_INT words, n_gpr, n_fpr;
2914 tree f_gpr, f_fpr, f_ovf, f_sav;
2915 tree gpr, fpr, ovf, sav, t;
2917 /* Only 64bit target needs something special. */
2920 std_expand_builtin_va_start (valist, nextarg);
2924 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2925 f_fpr = TREE_CHAIN (f_gpr);
2926 f_ovf = TREE_CHAIN (f_fpr);
2927 f_sav = TREE_CHAIN (f_ovf);
2929 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2930 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2931 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2932 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2933 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2935 /* Count number of gp and fp argument registers used. */
2936 words = current_function_args_info.words;
2937 n_gpr = current_function_args_info.regno;
2938 n_fpr = current_function_args_info.sse_regno;
2940 if (TARGET_DEBUG_ARG)
2941 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2942 (int) words, (int) n_gpr, (int) n_fpr);
2944 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2945 build_int_2 (n_gpr * 8, 0));
2946 TREE_SIDE_EFFECTS (t) = 1;
2947 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2949 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2950 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2951 TREE_SIDE_EFFECTS (t) = 1;
2952 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2954 /* Find the overflow area. */
2955 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2957 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2958 build_int_2 (words * UNITS_PER_WORD, 0));
2959 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2960 TREE_SIDE_EFFECTS (t) = 1;
2961 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2963 /* Find the register save area.
2964 Prologue of the function save it right above stack frame. */
2965 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2966 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2967 TREE_SIDE_EFFECTS (t) = 1;
2968 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2971 /* Implement va_arg. */
2973 ix86_va_arg (tree valist, tree type)
2975 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2976 tree f_gpr, f_fpr, f_ovf, f_sav;
2977 tree gpr, fpr, ovf, sav, t;
2979 rtx lab_false, lab_over = NULL_RTX;
2984 /* Only 64bit target needs something special. */
2987 return std_expand_builtin_va_arg (valist, type);
2990 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2991 f_fpr = TREE_CHAIN (f_gpr);
2992 f_ovf = TREE_CHAIN (f_fpr);
2993 f_sav = TREE_CHAIN (f_ovf);
2995 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2996 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2997 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2998 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2999 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3001 size = int_size_in_bytes (type);
3004 /* Passed by reference. */
3006 type = build_pointer_type (type);
3007 size = int_size_in_bytes (type);
3009 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3011 container = construct_container (TYPE_MODE (type), type, 0,
3012 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3014 * Pull the value out of the saved registers ...
3017 addr_rtx = gen_reg_rtx (Pmode);
3021 rtx int_addr_rtx, sse_addr_rtx;
3022 int needed_intregs, needed_sseregs;
3025 lab_over = gen_label_rtx ();
3026 lab_false = gen_label_rtx ();
3028 examine_argument (TYPE_MODE (type), type, 0,
3029 &needed_intregs, &needed_sseregs);
3032 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3033 || TYPE_ALIGN (type) > 128);
3035 /* In case we are passing structure, verify that it is consecutive block
3036 on the register save area. If not we need to do moves. */
3037 if (!need_temp && !REG_P (container))
3039 /* Verify that all registers are strictly consecutive */
3040 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3044 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3046 rtx slot = XVECEXP (container, 0, i);
3047 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3048 || INTVAL (XEXP (slot, 1)) != i * 16)
3056 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3058 rtx slot = XVECEXP (container, 0, i);
3059 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3060 || INTVAL (XEXP (slot, 1)) != i * 8)
3067 int_addr_rtx = addr_rtx;
3068 sse_addr_rtx = addr_rtx;
3072 int_addr_rtx = gen_reg_rtx (Pmode);
3073 sse_addr_rtx = gen_reg_rtx (Pmode);
3075 /* First ensure that we fit completely in registers. */
3078 emit_cmp_and_jump_insns (expand_expr
3079 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3080 GEN_INT ((REGPARM_MAX - needed_intregs +
3081 1) * 8), GE, const1_rtx, SImode,
3086 emit_cmp_and_jump_insns (expand_expr
3087 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3088 GEN_INT ((SSE_REGPARM_MAX -
3089 needed_sseregs + 1) * 16 +
3090 REGPARM_MAX * 8), GE, const1_rtx,
3091 SImode, 1, lab_false);
3094 /* Compute index to start of area used for integer regs. */
3097 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3098 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3099 if (r != int_addr_rtx)
3100 emit_move_insn (int_addr_rtx, r);
3104 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3105 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3106 if (r != sse_addr_rtx)
3107 emit_move_insn (sse_addr_rtx, r);
3115 /* Never use the memory itself, as it has the alias set. */
3116 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3117 mem = gen_rtx_MEM (BLKmode, x);
3118 force_operand (x, addr_rtx);
3119 set_mem_alias_set (mem, get_varargs_alias_set ());
3120 set_mem_align (mem, BITS_PER_UNIT);
3122 for (i = 0; i < XVECLEN (container, 0); i++)
3124 rtx slot = XVECEXP (container, 0, i);
3125 rtx reg = XEXP (slot, 0);
3126 enum machine_mode mode = GET_MODE (reg);
3132 if (SSE_REGNO_P (REGNO (reg)))
3134 src_addr = sse_addr_rtx;
3135 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3139 src_addr = int_addr_rtx;
3140 src_offset = REGNO (reg) * 8;
3142 src_mem = gen_rtx_MEM (mode, src_addr);
3143 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3144 src_mem = adjust_address (src_mem, mode, src_offset);
3145 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3146 emit_move_insn (dest_mem, src_mem);
3153 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3154 build_int_2 (needed_intregs * 8, 0));
3155 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3156 TREE_SIDE_EFFECTS (t) = 1;
3157 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3162 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3163 build_int_2 (needed_sseregs * 16, 0));
3164 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3165 TREE_SIDE_EFFECTS (t) = 1;
3166 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3169 emit_jump_insn (gen_jump (lab_over));
3171 emit_label (lab_false);
3174 /* ... otherwise out of the overflow area. */
3176 /* Care for on-stack alignment if needed. */
3177 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3181 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3182 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3183 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3187 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3189 emit_move_insn (addr_rtx, r);
3192 build (PLUS_EXPR, TREE_TYPE (t), t,
3193 build_int_2 (rsize * UNITS_PER_WORD, 0));
3194 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3195 TREE_SIDE_EFFECTS (t) = 1;
3196 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3199 emit_label (lab_over);
3203 r = gen_rtx_MEM (Pmode, addr_rtx);
3204 set_mem_alias_set (r, get_varargs_alias_set ());
3205 emit_move_insn (addr_rtx, r);
3211 /* Return nonzero if OP is either a i387 or SSE fp register. */
3213 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3215 return ANY_FP_REG_P (op);
3218 /* Return nonzero if OP is an i387 fp register. */
3220 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3222 return FP_REG_P (op);
3225 /* Return nonzero if OP is a non-fp register_operand. */
3227 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3229 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3232 /* Return nonzero if OP is a register operand other than an
3233 i387 fp register. */
3235 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3237 return register_operand (op, mode) && !FP_REG_P (op);
3240 /* Return nonzero if OP is general operand representable on x86_64. */
3243 x86_64_general_operand (rtx op, enum machine_mode mode)
3246 return general_operand (op, mode);
3247 if (nonimmediate_operand (op, mode))
3249 return x86_64_sign_extended_value (op);
3252 /* Return nonzero if OP is general operand representable on x86_64
3253 as either sign extended or zero extended constant. */
3256 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3259 return general_operand (op, mode);
3260 if (nonimmediate_operand (op, mode))
3262 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3265 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3268 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3271 return nonmemory_operand (op, mode);
3272 if (register_operand (op, mode))
3274 return x86_64_sign_extended_value (op);
3277 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3280 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3282 if (!TARGET_64BIT || !flag_pic)
3283 return nonmemory_operand (op, mode);
3284 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3286 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3291 /* Return nonzero if OPNUM's MEM should be matched
3292 in movabs* patterns. */
3295 ix86_check_movabs (rtx insn, int opnum)
3299 set = PATTERN (insn);
3300 if (GET_CODE (set) == PARALLEL)
3301 set = XVECEXP (set, 0, 0);
3302 if (GET_CODE (set) != SET)
3304 mem = XEXP (set, opnum);
3305 while (GET_CODE (mem) == SUBREG)
3306 mem = SUBREG_REG (mem);
3307 if (GET_CODE (mem) != MEM)
3309 return (volatile_ok || !MEM_VOLATILE_P (mem));
3312 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3315 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3318 return nonmemory_operand (op, mode);
3319 if (register_operand (op, mode))
3321 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3324 /* Return nonzero if OP is immediate operand representable on x86_64. */
3327 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3330 return immediate_operand (op, mode);
3331 return x86_64_sign_extended_value (op);
3334 /* Return nonzero if OP is immediate operand representable on x86_64. */
3337 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3339 return x86_64_zero_extended_value (op);
3342 /* Return nonzero if OP is (const_int 1), else return zero. */
3345 const_int_1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3347 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3350 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3351 for shift & compare patterns, as shifting by 0 does not change flags),
3352 else return zero. */
3355 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3357 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3360 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3361 reference and a constant. */
3364 symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3366 switch (GET_CODE (op))
3374 if (GET_CODE (op) == SYMBOL_REF
3375 || GET_CODE (op) == LABEL_REF
3376 || (GET_CODE (op) == UNSPEC
3377 && (XINT (op, 1) == UNSPEC_GOT
3378 || XINT (op, 1) == UNSPEC_GOTOFF
3379 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3381 if (GET_CODE (op) != PLUS
3382 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3386 if (GET_CODE (op) == SYMBOL_REF
3387 || GET_CODE (op) == LABEL_REF)
3389 /* Only @GOTOFF gets offsets. */
3390 if (GET_CODE (op) != UNSPEC
3391 || XINT (op, 1) != UNSPEC_GOTOFF)
3394 op = XVECEXP (op, 0, 0);
3395 if (GET_CODE (op) == SYMBOL_REF
3396 || GET_CODE (op) == LABEL_REF)
3405 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3408 pic_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3410 if (GET_CODE (op) != CONST)
3415 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3420 if (GET_CODE (op) == UNSPEC)
3422 if (GET_CODE (op) != PLUS
3423 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3426 if (GET_CODE (op) == UNSPEC)
3432 /* Return true if OP is a symbolic operand that resolves locally. */
3435 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3437 if (GET_CODE (op) == CONST
3438 && GET_CODE (XEXP (op, 0)) == PLUS
3439 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3440 op = XEXP (XEXP (op, 0), 0);
3442 if (GET_CODE (op) == LABEL_REF)
3445 if (GET_CODE (op) != SYMBOL_REF)
3448 if (SYMBOL_REF_LOCAL_P (op))
3451 /* There is, however, a not insubstantial body of code in the rest of
3452 the compiler that assumes it can just stick the results of
3453 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3454 /* ??? This is a hack. Should update the body of the compiler to
3455 always create a DECL an invoke targetm.encode_section_info. */
3456 if (strncmp (XSTR (op, 0), internal_label_prefix,
3457 internal_label_prefix_len) == 0)
3463 /* Test for various thread-local symbols. */
3466 tls_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3468 if (GET_CODE (op) != SYMBOL_REF)
3470 return SYMBOL_REF_TLS_MODEL (op);
3474 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3476 if (GET_CODE (op) != SYMBOL_REF)
3478 return SYMBOL_REF_TLS_MODEL (op) == kind;
3482 global_dynamic_symbolic_operand (register rtx op,
3483 enum machine_mode mode ATTRIBUTE_UNUSED)
3485 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3489 local_dynamic_symbolic_operand (register rtx op,
3490 enum machine_mode mode ATTRIBUTE_UNUSED)
3492 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3496 initial_exec_symbolic_operand (register rtx op,
3497 enum machine_mode mode ATTRIBUTE_UNUSED)
3499 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3503 local_exec_symbolic_operand (register rtx op,
3504 enum machine_mode mode ATTRIBUTE_UNUSED)
3506 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3509 /* Test for a valid operand for a call instruction. Don't allow the
3510 arg pointer register or virtual regs since they may decay into
3511 reg + const, which the patterns can't handle. */
3514 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3516 /* Disallow indirect through a virtual register. This leads to
3517 compiler aborts when trying to eliminate them. */
3518 if (GET_CODE (op) == REG
3519 && (op == arg_pointer_rtx
3520 || op == frame_pointer_rtx
3521 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3522 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3525 /* Disallow `call 1234'. Due to varying assembler lameness this
3526 gets either rejected or translated to `call .+1234'. */
3527 if (GET_CODE (op) == CONST_INT)
3530 /* Explicitly allow SYMBOL_REF even if pic. */
3531 if (GET_CODE (op) == SYMBOL_REF)
3534 /* Otherwise we can allow any general_operand in the address. */
3535 return general_operand (op, Pmode);
3538 /* Test for a valid operand for a call instruction. Don't allow the
3539 arg pointer register or virtual regs since they may decay into
3540 reg + const, which the patterns can't handle. */
3543 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3545 /* Disallow indirect through a virtual register. This leads to
3546 compiler aborts when trying to eliminate them. */
3547 if (GET_CODE (op) == REG
3548 && (op == arg_pointer_rtx
3549 || op == frame_pointer_rtx
3550 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3551 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3554 /* Explicitly allow SYMBOL_REF even if pic. */
3555 if (GET_CODE (op) == SYMBOL_REF)
3558 /* Otherwise we can only allow register operands. */
3559 return register_operand (op, Pmode);
3563 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3565 if (GET_CODE (op) == CONST
3566 && GET_CODE (XEXP (op, 0)) == PLUS
3567 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3568 op = XEXP (XEXP (op, 0), 0);
3569 return GET_CODE (op) == SYMBOL_REF;
3572 /* Match exactly zero and one. */
3575 const0_operand (register rtx op, enum machine_mode mode)
3577 return op == CONST0_RTX (mode);
3581 const1_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3583 return op == const1_rtx;
3586 /* Match 2, 4, or 8. Used for leal multiplicands. */
3589 const248_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3591 return (GET_CODE (op) == CONST_INT
3592 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3595 /* True if this is a constant appropriate for an increment or decrement. */
3598 incdec_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3600 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3601 registers, since carry flag is not set. */
3602 if (TARGET_PENTIUM4 && !optimize_size)
3604 return op == const1_rtx || op == constm1_rtx;
3607 /* Return nonzero if OP is acceptable as operand of DImode shift
3611 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3614 return nonimmediate_operand (op, mode);
3616 return register_operand (op, mode);
3619 /* Return false if this is the stack pointer, or any other fake
3620 register eliminable to the stack pointer. Otherwise, this is
3623 This is used to prevent esp from being used as an index reg.
3624 Which would only happen in pathological cases. */
3627 reg_no_sp_operand (register rtx op, enum machine_mode mode)
3630 if (GET_CODE (t) == SUBREG)
3632 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3635 return register_operand (op, mode);
3639 mmx_reg_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3641 return MMX_REG_P (op);
3644 /* Return false if this is any eliminable register. Otherwise
3648 general_no_elim_operand (register rtx op, enum machine_mode mode)
3651 if (GET_CODE (t) == SUBREG)
3653 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3654 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3655 || t == virtual_stack_dynamic_rtx)
3658 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3659 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3662 return general_operand (op, mode);
3665 /* Return false if this is any eliminable register. Otherwise
3666 register_operand or const_int. */
3669 nonmemory_no_elim_operand (register rtx op, enum machine_mode mode)
3672 if (GET_CODE (t) == SUBREG)
3674 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3675 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3676 || t == virtual_stack_dynamic_rtx)
3679 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3682 /* Return false if this is any eliminable register or stack register,
3683 otherwise work like register_operand. */
3686 index_register_operand (register rtx op, enum machine_mode mode)
3689 if (GET_CODE (t) == SUBREG)
3693 if (t == arg_pointer_rtx
3694 || t == frame_pointer_rtx
3695 || t == virtual_incoming_args_rtx
3696 || t == virtual_stack_vars_rtx
3697 || t == virtual_stack_dynamic_rtx
3698 || REGNO (t) == STACK_POINTER_REGNUM)
3701 return general_operand (op, mode);
3704 /* Return true if op is a Q_REGS class register. */
3707 q_regs_operand (register rtx op, enum machine_mode mode)
3709 if (mode != VOIDmode && GET_MODE (op) != mode)
3711 if (GET_CODE (op) == SUBREG)
3712 op = SUBREG_REG (op);
3713 return ANY_QI_REG_P (op);
3716 /* Return true if op is an flags register. */
3719 flags_reg_operand (register rtx op, enum machine_mode mode)
3721 if (mode != VOIDmode && GET_MODE (op) != mode)
3723 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3726 /* Return true if op is a NON_Q_REGS class register. */
3729 non_q_regs_operand (register rtx op, enum machine_mode mode)
3731 if (mode != VOIDmode && GET_MODE (op) != mode)
3733 if (GET_CODE (op) == SUBREG)
3734 op = SUBREG_REG (op);
3735 return NON_QI_REG_P (op);
3739 zero_extended_scalar_load_operand (rtx op,
3740 enum machine_mode mode ATTRIBUTE_UNUSED)
3743 if (GET_CODE (op) != MEM)
3745 op = maybe_get_pool_constant (op);
3748 if (GET_CODE (op) != CONST_VECTOR)
3751 (GET_MODE_SIZE (GET_MODE (op)) /
3752 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3753 for (n_elts--; n_elts > 0; n_elts--)
3755 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3756 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3762 /* Return 1 when OP is operand acceptable for standard SSE move. */
3764 vector_move_operand (rtx op, enum machine_mode mode)
3766 if (nonimmediate_operand (op, mode))
3768 if (GET_MODE (op) != mode && mode != VOIDmode)
3770 return (op == CONST0_RTX (GET_MODE (op)));
3773 /* Return true if op if a valid address, and does not contain
3774 a segment override. */
3777 no_seg_address_operand (register rtx op, enum machine_mode mode)
3779 struct ix86_address parts;
3781 if (! address_operand (op, mode))
3784 if (! ix86_decompose_address (op, &parts))
3787 return parts.seg == SEG_DEFAULT;
3790 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3793 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3795 enum rtx_code code = GET_CODE (op);
3798 /* Operations supported directly. */
3808 /* These are equivalent to ones above in non-IEEE comparisons. */
3815 return !TARGET_IEEE_FP;
3820 /* Return 1 if OP is a valid comparison operator in valid mode. */
3822 ix86_comparison_operator (register rtx op, enum machine_mode mode)
3824 enum machine_mode inmode;
3825 enum rtx_code code = GET_CODE (op);
3826 if (mode != VOIDmode && GET_MODE (op) != mode)
3828 if (GET_RTX_CLASS (code) != '<')
3830 inmode = GET_MODE (XEXP (op, 0));
3832 if (inmode == CCFPmode || inmode == CCFPUmode)
3834 enum rtx_code second_code, bypass_code;
3835 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3836 return (bypass_code == NIL && second_code == NIL);
3843 if (inmode == CCmode || inmode == CCGCmode
3844 || inmode == CCGOCmode || inmode == CCNOmode)
3847 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3848 if (inmode == CCmode)
3852 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3860 /* Return 1 if OP is a valid comparison operator testing carry flag
3863 ix86_carry_flag_operator (register rtx op, enum machine_mode mode)
3865 enum machine_mode inmode;
3866 enum rtx_code code = GET_CODE (op);
3868 if (mode != VOIDmode && GET_MODE (op) != mode)
3870 if (GET_RTX_CLASS (code) != '<')
3872 inmode = GET_MODE (XEXP (op, 0));
3873 if (GET_CODE (XEXP (op, 0)) != REG
3874 || REGNO (XEXP (op, 0)) != 17
3875 || XEXP (op, 1) != const0_rtx)
3878 if (inmode == CCFPmode || inmode == CCFPUmode)
3880 enum rtx_code second_code, bypass_code;
3882 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3883 if (bypass_code != NIL || second_code != NIL)
3885 code = ix86_fp_compare_code_to_integer (code);
3887 else if (inmode != CCmode)
3892 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3895 fcmov_comparison_operator (register rtx op, enum machine_mode mode)
3897 enum machine_mode inmode;
3898 enum rtx_code code = GET_CODE (op);
3900 if (mode != VOIDmode && GET_MODE (op) != mode)
3902 if (GET_RTX_CLASS (code) != '<')
3904 inmode = GET_MODE (XEXP (op, 0));
3905 if (inmode == CCFPmode || inmode == CCFPUmode)
3907 enum rtx_code second_code, bypass_code;
3909 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3910 if (bypass_code != NIL || second_code != NIL)
3912 code = ix86_fp_compare_code_to_integer (code);
3914 /* i387 supports just limited amount of conditional codes. */
3917 case LTU: case GTU: case LEU: case GEU:
3918 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3921 case ORDERED: case UNORDERED:
3929 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3932 promotable_binary_operator (register rtx op,
3933 enum machine_mode mode ATTRIBUTE_UNUSED)
3935 switch (GET_CODE (op))
3938 /* Modern CPUs have same latency for HImode and SImode multiply,
3939 but 386 and 486 do HImode multiply faster. */
3940 return ix86_tune > PROCESSOR_I486;
3952 /* Nearly general operand, but accept any const_double, since we wish
3953 to be able to drop them into memory rather than have them get pulled
3957 cmp_fp_expander_operand (register rtx op, enum machine_mode mode)
3959 if (mode != VOIDmode && mode != GET_MODE (op))
3961 if (GET_CODE (op) == CONST_DOUBLE)
3963 return general_operand (op, mode);
3966 /* Match an SI or HImode register for a zero_extract. */
3969 ext_register_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3972 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3973 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3976 if (!register_operand (op, VOIDmode))
3979 /* Be careful to accept only registers having upper parts. */
3980 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3981 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3984 /* Return 1 if this is a valid binary floating-point operation.
3985 OP is the expression matched, and MODE is its mode. */
3988 binary_fp_operator (register rtx op, enum machine_mode mode)
3990 if (mode != VOIDmode && mode != GET_MODE (op))
3993 switch (GET_CODE (op))
3999 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4007 mult_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4009 return GET_CODE (op) == MULT;
4013 div_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4015 return GET_CODE (op) == DIV;
4019 arith_or_logical_operator (rtx op, enum machine_mode mode)
4021 return ((mode == VOIDmode || GET_MODE (op) == mode)
4022 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4023 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4026 /* Returns 1 if OP is memory operand with a displacement. */
4029 memory_displacement_operand (register rtx op, enum machine_mode mode)
4031 struct ix86_address parts;
4033 if (! memory_operand (op, mode))
4036 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4039 return parts.disp != NULL_RTX;
4042 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4043 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4045 ??? It seems likely that this will only work because cmpsi is an
4046 expander, and no actual insns use this. */
4049 cmpsi_operand (rtx op, enum machine_mode mode)
4051 if (nonimmediate_operand (op, mode))
4054 if (GET_CODE (op) == AND
4055 && GET_MODE (op) == SImode
4056 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4057 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4058 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4059 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4060 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4061 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4067 /* Returns 1 if OP is memory operand that can not be represented by the
4071 long_memory_operand (register rtx op, enum machine_mode mode)
4073 if (! memory_operand (op, mode))
4076 return memory_address_length (op) != 0;
4079 /* Return nonzero if the rtx is known aligned. */
4082 aligned_operand (rtx op, enum machine_mode mode)
4084 struct ix86_address parts;
4086 if (!general_operand (op, mode))
4089 /* Registers and immediate operands are always "aligned". */
4090 if (GET_CODE (op) != MEM)
4093 /* Don't even try to do any aligned optimizations with volatiles. */
4094 if (MEM_VOLATILE_P (op))
4099 /* Pushes and pops are only valid on the stack pointer. */
4100 if (GET_CODE (op) == PRE_DEC
4101 || GET_CODE (op) == POST_INC)
4104 /* Decode the address. */
4105 if (! ix86_decompose_address (op, &parts))
4108 if (parts.base && GET_CODE (parts.base) == SUBREG)
4109 parts.base = SUBREG_REG (parts.base);
4110 if (parts.index && GET_CODE (parts.index) == SUBREG)
4111 parts.index = SUBREG_REG (parts.index);
4113 /* Look for some component that isn't known to be aligned. */
4117 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4122 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4127 if (GET_CODE (parts.disp) != CONST_INT
4128 || (INTVAL (parts.disp) & 3) != 0)
4132 /* Didn't find one -- this must be an aligned address. */
4136 /* Initialize the table of extra 80387 mathematical constants. */
4139 init_ext_80387_constants (void)
4141 static const char * cst[5] =
4143 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4144 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4145 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4146 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4147 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4151 for (i = 0; i < 5; i++)
4153 real_from_string (&ext_80387_constants_table[i], cst[i]);
4154 /* Ensure each constant is rounded to XFmode precision. */
4155 real_convert (&ext_80387_constants_table[i],
4156 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode,
4157 &ext_80387_constants_table[i]);
4160 ext_80387_constants_init = 1;
4163 /* Return true if the constant is something that can be loaded with
4164 a special instruction. */
4167 standard_80387_constant_p (rtx x)
4169 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4172 if (x == CONST0_RTX (GET_MODE (x)))
4174 if (x == CONST1_RTX (GET_MODE (x)))
4177 /* For XFmode constants, try to find a special 80387 instruction on
4178 those CPUs that benefit from them. */
4179 if ((GET_MODE (x) == XFmode || GET_MODE (x) == TFmode)
4180 && x86_ext_80387_constants & TUNEMASK)
4185 if (! ext_80387_constants_init)
4186 init_ext_80387_constants ();
4188 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4189 for (i = 0; i < 5; i++)
4190 if (real_identical (&r, &ext_80387_constants_table[i]))
4197 /* Return the opcode of the special instruction to be used to load
4201 standard_80387_constant_opcode (rtx x)
4203 switch (standard_80387_constant_p (x))
4223 /* Return the CONST_DOUBLE representing the 80387 constant that is
4224 loaded by the specified special instruction. The argument IDX
4225 matches the return value from standard_80387_constant_p. */
4228 standard_80387_constant_rtx (int idx)
4232 if (! ext_80387_constants_init)
4233 init_ext_80387_constants ();
4249 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4250 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode);
4253 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4256 standard_sse_constant_p (rtx x)
4258 if (x == const0_rtx)
4260 return (x == CONST0_RTX (GET_MODE (x)));
4263 /* Returns 1 if OP contains a symbol reference */
4266 symbolic_reference_mentioned_p (rtx op)
4268 register const char *fmt;
4271 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4274 fmt = GET_RTX_FORMAT (GET_CODE (op));
4275 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4281 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4282 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4286 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4293 /* Return 1 if it is appropriate to emit `ret' instructions in the
4294 body of a function. Do this only if the epilogue is simple, needing a
4295 couple of insns. Prior to reloading, we can't tell how many registers
4296 must be saved, so return 0 then. Return 0 if there is no frame
4297 marker to de-allocate.
4299 If NON_SAVING_SETJMP is defined and true, then it is not possible
4300 for the epilogue to be simple, so return 0. This is a special case
4301 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4302 until final, but jump_optimize may need to know sooner if a
4306 ix86_can_use_return_insn_p (void)
4308 struct ix86_frame frame;
4310 #ifdef NON_SAVING_SETJMP
4311 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4315 if (! reload_completed || frame_pointer_needed)
4318 /* Don't allow more than 32 pop, since that's all we can do
4319 with one instruction. */
4320 if (current_function_pops_args
4321 && current_function_args_size >= 32768)
4324 ix86_compute_frame_layout (&frame);
4325 return frame.to_allocate == 0 && frame.nregs == 0;
4328 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4330 x86_64_sign_extended_value (rtx value)
4332 switch (GET_CODE (value))
4334 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4335 to be at least 32 and this all acceptable constants are
4336 represented as CONST_INT. */
4338 if (HOST_BITS_PER_WIDE_INT == 32)
4342 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4343 return trunc_int_for_mode (val, SImode) == val;
4347 /* For certain code models, the symbolic references are known to fit.
4348 in CM_SMALL_PIC model we know it fits if it is local to the shared
4349 library. Don't count TLS SYMBOL_REFs here, since they should fit
4350 only if inside of UNSPEC handled below. */
4352 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4354 /* For certain code models, the code is near as well. */
4356 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4357 || ix86_cmodel == CM_KERNEL);
4359 /* We also may accept the offsetted memory references in certain special
4362 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4363 switch (XINT (XEXP (value, 0), 1))
4365 case UNSPEC_GOTPCREL:
4367 case UNSPEC_GOTNTPOFF:
4373 if (GET_CODE (XEXP (value, 0)) == PLUS)
4375 rtx op1 = XEXP (XEXP (value, 0), 0);
4376 rtx op2 = XEXP (XEXP (value, 0), 1);
4377 HOST_WIDE_INT offset;
4379 if (ix86_cmodel == CM_LARGE)
4381 if (GET_CODE (op2) != CONST_INT)
4383 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4384 switch (GET_CODE (op1))
4387 /* For CM_SMALL assume that latest object is 16MB before
4388 end of 31bits boundary. We may also accept pretty
4389 large negative constants knowing that all objects are
4390 in the positive half of address space. */
4391 if (ix86_cmodel == CM_SMALL
4392 && offset < 16*1024*1024
4393 && trunc_int_for_mode (offset, SImode) == offset)
4395 /* For CM_KERNEL we know that all object resist in the
4396 negative half of 32bits address space. We may not
4397 accept negative offsets, since they may be just off
4398 and we may accept pretty large positive ones. */
4399 if (ix86_cmodel == CM_KERNEL
4401 && trunc_int_for_mode (offset, SImode) == offset)
4405 /* These conditions are similar to SYMBOL_REF ones, just the
4406 constraints for code models differ. */
4407 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4408 && offset < 16*1024*1024
4409 && trunc_int_for_mode (offset, SImode) == offset)
4411 if (ix86_cmodel == CM_KERNEL
4413 && trunc_int_for_mode (offset, SImode) == offset)
4417 switch (XINT (op1, 1))
4422 && trunc_int_for_mode (offset, SImode) == offset)
4436 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4438 x86_64_zero_extended_value (rtx value)
4440 switch (GET_CODE (value))
4443 if (HOST_BITS_PER_WIDE_INT == 32)
4444 return (GET_MODE (value) == VOIDmode
4445 && !CONST_DOUBLE_HIGH (value));
4449 if (HOST_BITS_PER_WIDE_INT == 32)
4450 return INTVAL (value) >= 0;
4452 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4455 /* For certain code models, the symbolic references are known to fit. */
4457 return ix86_cmodel == CM_SMALL;
4459 /* For certain code models, the code is near as well. */
4461 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4463 /* We also may accept the offsetted memory references in certain special
4466 if (GET_CODE (XEXP (value, 0)) == PLUS)
4468 rtx op1 = XEXP (XEXP (value, 0), 0);
4469 rtx op2 = XEXP (XEXP (value, 0), 1);
4471 if (ix86_cmodel == CM_LARGE)
4473 switch (GET_CODE (op1))
4477 /* For small code model we may accept pretty large positive
4478 offsets, since one bit is available for free. Negative
4479 offsets are limited by the size of NULL pointer area
4480 specified by the ABI. */
4481 if (ix86_cmodel == CM_SMALL
4482 && GET_CODE (op2) == CONST_INT
4483 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4484 && (trunc_int_for_mode (INTVAL (op2), SImode)
4487 /* ??? For the kernel, we may accept adjustment of
4488 -0x10000000, since we know that it will just convert
4489 negative address space to positive, but perhaps this
4490 is not worthwhile. */
4493 /* These conditions are similar to SYMBOL_REF ones, just the
4494 constraints for code models differ. */
4495 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4496 && GET_CODE (op2) == CONST_INT
4497 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4498 && (trunc_int_for_mode (INTVAL (op2), SImode)
4512 /* Value should be nonzero if functions must have frame pointers.
4513 Zero means the frame pointer need not be set up (and parms may
4514 be accessed via the stack pointer) in functions that seem suitable. */
4517 ix86_frame_pointer_required (void)
4519 /* If we accessed previous frames, then the generated code expects
4520 to be able to access the saved ebp value in our frame. */
4521 if (cfun->machine->accesses_prev_frame)
4524 /* Several x86 os'es need a frame pointer for other reasons,
4525 usually pertaining to setjmp. */
4526 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4529 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4530 the frame pointer by default. Turn it back on now if we've not
4531 got a leaf function. */
4532 if (TARGET_OMIT_LEAF_FRAME_POINTER
4533 && (!current_function_is_leaf))
4536 if (current_function_profile)
4542 /* Record that the current function accesses previous call frames. */
4545 ix86_setup_frame_addresses (void)
4547 cfun->machine->accesses_prev_frame = 1;
4550 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4551 # define USE_HIDDEN_LINKONCE 1
4553 # define USE_HIDDEN_LINKONCE 0
4556 static int pic_labels_used;
4558 /* Fills in the label name that should be used for a pc thunk for
4559 the given register. */
4562 get_pc_thunk_name (char name[32], unsigned int regno)
4564 if (USE_HIDDEN_LINKONCE)
4565 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4567 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4571 /* This function generates code for -fpic that loads %ebx with
4572 the return address of the caller and then returns. */
4575 ix86_file_end (void)
4580 for (regno = 0; regno < 8; ++regno)
4584 if (! ((pic_labels_used >> regno) & 1))
4587 get_pc_thunk_name (name, regno);
4589 if (USE_HIDDEN_LINKONCE)
4593 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4595 TREE_PUBLIC (decl) = 1;
4596 TREE_STATIC (decl) = 1;
4597 DECL_ONE_ONLY (decl) = 1;
4599 (*targetm.asm_out.unique_section) (decl, 0);
4600 named_section (decl, NULL, 0);
4602 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4603 fputs ("\t.hidden\t", asm_out_file);
4604 assemble_name (asm_out_file, name);
4605 fputc ('\n', asm_out_file);
4606 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4611 ASM_OUTPUT_LABEL (asm_out_file, name);
4614 xops[0] = gen_rtx_REG (SImode, regno);
4615 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4616 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4617 output_asm_insn ("ret", xops);
4620 if (NEED_INDICATE_EXEC_STACK)
4621 file_end_indicate_exec_stack ();
4624 /* Emit code for the SET_GOT patterns. */
4627 output_set_got (rtx dest)
4632 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4634 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4636 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4639 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4641 output_asm_insn ("call\t%a2", xops);
4644 /* Output the "canonical" label name ("Lxx$pb") here too. This
4645 is what will be referred to by the Mach-O PIC subsystem. */
4646 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4648 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4649 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4652 output_asm_insn ("pop{l}\t%0", xops);
4657 get_pc_thunk_name (name, REGNO (dest));
4658 pic_labels_used |= 1 << REGNO (dest);
4660 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4661 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4662 output_asm_insn ("call\t%X2", xops);
4665 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4666 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4667 else if (!TARGET_MACHO)
4668 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4673 /* Generate an "push" pattern for input ARG. */
4678 return gen_rtx_SET (VOIDmode,
4680 gen_rtx_PRE_DEC (Pmode,
4681 stack_pointer_rtx)),
4685 /* Return >= 0 if there is an unused call-clobbered register available
4686 for the entire function. */
4689 ix86_select_alt_pic_regnum (void)
4691 if (current_function_is_leaf && !current_function_profile)
4694 for (i = 2; i >= 0; --i)
4695 if (!regs_ever_live[i])
4699 return INVALID_REGNUM;
4702 /* Return 1 if we need to save REGNO. */
4704 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4706 if (pic_offset_table_rtx
4707 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4708 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4709 || current_function_profile
4710 || current_function_calls_eh_return
4711 || current_function_uses_const_pool))
4713 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4718 if (current_function_calls_eh_return && maybe_eh_return)
4723 unsigned test = EH_RETURN_DATA_REGNO (i);
4724 if (test == INVALID_REGNUM)
4731 return (regs_ever_live[regno]
4732 && !call_used_regs[regno]
4733 && !fixed_regs[regno]
4734 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4737 /* Return number of registers to be saved on the stack. */
4740 ix86_nsaved_regs (void)
4745 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4746 if (ix86_save_reg (regno, true))
4751 /* Return the offset between two registers, one to be eliminated, and the other
4752 its replacement, at the start of a routine. */
4755 ix86_initial_elimination_offset (int from, int to)
4757 struct ix86_frame frame;
4758 ix86_compute_frame_layout (&frame);
4760 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4761 return frame.hard_frame_pointer_offset;
4762 else if (from == FRAME_POINTER_REGNUM
4763 && to == HARD_FRAME_POINTER_REGNUM)
4764 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4767 if (to != STACK_POINTER_REGNUM)
4769 else if (from == ARG_POINTER_REGNUM)
4770 return frame.stack_pointer_offset;
4771 else if (from != FRAME_POINTER_REGNUM)
4774 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4778 /* Fill structure ix86_frame about frame of currently computed function. */
4781 ix86_compute_frame_layout (struct ix86_frame *frame)
4783 HOST_WIDE_INT total_size;
4784 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4786 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4787 HOST_WIDE_INT size = get_frame_size ();
4789 frame->nregs = ix86_nsaved_regs ();
4792 /* During reload iteration the amount of registers saved can change.
4793 Recompute the value as needed. Do not recompute when amount of registers
4794 didn't change as reload does mutiple calls to the function and does not
4795 expect the decision to change within single iteration. */
4797 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4799 int count = frame->nregs;
4801 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4802 /* The fast prologue uses move instead of push to save registers. This
4803 is significantly longer, but also executes faster as modern hardware
4804 can execute the moves in parallel, but can't do that for push/pop.
4806 Be careful about choosing what prologue to emit: When function takes
4807 many instructions to execute we may use slow version as well as in
4808 case function is known to be outside hot spot (this is known with
4809 feedback only). Weight the size of function by number of registers
4810 to save as it is cheap to use one or two push instructions but very
4811 slow to use many of them. */
4813 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4814 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4815 || (flag_branch_probabilities
4816 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4817 cfun->machine->use_fast_prologue_epilogue = false;
4819 cfun->machine->use_fast_prologue_epilogue
4820 = !expensive_function_p (count);
4822 if (TARGET_PROLOGUE_USING_MOVE
4823 && cfun->machine->use_fast_prologue_epilogue)
4824 frame->save_regs_using_mov = true;
4826 frame->save_regs_using_mov = false;
4829 /* Skip return address and saved base pointer. */
4830 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4832 frame->hard_frame_pointer_offset = offset;
4834 /* Do some sanity checking of stack_alignment_needed and
4835 preferred_alignment, since i386 port is the only using those features
4836 that may break easily. */
4838 if (size && !stack_alignment_needed)
4840 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4842 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4844 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4847 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4848 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4850 /* Register save area */
4851 offset += frame->nregs * UNITS_PER_WORD;
4854 if (ix86_save_varrargs_registers)
4856 offset += X86_64_VARARGS_SIZE;
4857 frame->va_arg_size = X86_64_VARARGS_SIZE;
4860 frame->va_arg_size = 0;
4862 /* Align start of frame for local function. */
4863 frame->padding1 = ((offset + stack_alignment_needed - 1)
4864 & -stack_alignment_needed) - offset;
4866 offset += frame->padding1;
4868 /* Frame pointer points here. */
4869 frame->frame_pointer_offset = offset;
4873 /* Add outgoing arguments area. Can be skipped if we eliminated
4874 all the function calls as dead code. */
4875 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4877 offset += current_function_outgoing_args_size;
4878 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4881 frame->outgoing_arguments_size = 0;
4883 /* Align stack boundary. Only needed if we're calling another function
4885 if (!current_function_is_leaf || current_function_calls_alloca)
4886 frame->padding2 = ((offset + preferred_alignment - 1)
4887 & -preferred_alignment) - offset;
4889 frame->padding2 = 0;
4891 offset += frame->padding2;
4893 /* We've reached end of stack frame. */
4894 frame->stack_pointer_offset = offset;
4896 /* Size prologue needs to allocate. */
4897 frame->to_allocate =
4898 (size + frame->padding1 + frame->padding2
4899 + frame->outgoing_arguments_size + frame->va_arg_size);
4901 if (!frame->to_allocate && frame->nregs <= 1)
4902 frame->save_regs_using_mov = false;
4904 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4905 && current_function_is_leaf)
4907 frame->red_zone_size = frame->to_allocate;
4908 if (frame->save_regs_using_mov)
4909 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4910 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4911 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4914 frame->red_zone_size = 0;
4915 frame->to_allocate -= frame->red_zone_size;
4916 frame->stack_pointer_offset -= frame->red_zone_size;
4918 fprintf (stderr, "nregs: %i\n", frame->nregs);
4919 fprintf (stderr, "size: %i\n", size);
4920 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4921 fprintf (stderr, "padding1: %i\n", frame->padding1);
4922 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4923 fprintf (stderr, "padding2: %i\n", frame->padding2);
4924 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4925 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4926 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4927 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4928 frame->hard_frame_pointer_offset);
4929 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4933 /* Emit code to save registers in the prologue. */
4936 ix86_emit_save_regs (void)
4941 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4942 if (ix86_save_reg (regno, true))
4944 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4945 RTX_FRAME_RELATED_P (insn) = 1;
4949 /* Emit code to save registers using MOV insns. First register
4950 is restored from POINTER + OFFSET. */
4952 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4957 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4958 if (ix86_save_reg (regno, true))
4960 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4962 gen_rtx_REG (Pmode, regno));
4963 RTX_FRAME_RELATED_P (insn) = 1;
4964 offset += UNITS_PER_WORD;
4968 /* Expand the prologue into a bunch of separate insns. */
4971 ix86_expand_prologue (void)
4975 struct ix86_frame frame;
4976 HOST_WIDE_INT allocate;
4978 ix86_compute_frame_layout (&frame);
4980 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4981 slower on all targets. Also sdb doesn't like it. */
4983 if (frame_pointer_needed)
4985 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4986 RTX_FRAME_RELATED_P (insn) = 1;
4988 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4989 RTX_FRAME_RELATED_P (insn) = 1;
4992 allocate = frame.to_allocate;
4994 if (!frame.save_regs_using_mov)
4995 ix86_emit_save_regs ();
4997 allocate += frame.nregs * UNITS_PER_WORD;
4999 /* When using red zone we may start register saving before allocating
5000 the stack frame saving one cycle of the prologue. */
5001 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5002 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5003 : stack_pointer_rtx,
5004 -frame.nregs * UNITS_PER_WORD);
5008 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5010 insn = emit_insn (gen_pro_epilogue_adjust_stack
5011 (stack_pointer_rtx, stack_pointer_rtx,
5012 GEN_INT (-allocate)));
5013 RTX_FRAME_RELATED_P (insn) = 1;
5017 /* ??? Is this only valid for Win32? */
5024 arg0 = gen_rtx_REG (SImode, 0);
5025 emit_move_insn (arg0, GEN_INT (allocate));
5027 sym = gen_rtx_MEM (FUNCTION_MODE,
5028 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
5029 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
5031 CALL_INSN_FUNCTION_USAGE (insn)
5032 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
5033 CALL_INSN_FUNCTION_USAGE (insn));
5035 /* Don't allow scheduling pass to move insns across __alloca
5037 emit_insn (gen_blockage (const0_rtx));
5039 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5041 if (!frame_pointer_needed || !frame.to_allocate)
5042 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5044 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5045 -frame.nregs * UNITS_PER_WORD);
5048 pic_reg_used = false;
5049 if (pic_offset_table_rtx
5050 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5051 || current_function_profile))
5053 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5055 if (alt_pic_reg_used != INVALID_REGNUM)
5056 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5058 pic_reg_used = true;
5063 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5065 /* Even with accurate pre-reload life analysis, we can wind up
5066 deleting all references to the pic register after reload.
5067 Consider if cross-jumping unifies two sides of a branch
5068 controlled by a comparison vs the only read from a global.
5069 In which case, allow the set_got to be deleted, though we're
5070 too late to do anything about the ebx save in the prologue. */
5071 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5074 /* Prevent function calls from be scheduled before the call to mcount.
5075 In the pic_reg_used case, make sure that the got load isn't deleted. */
5076 if (current_function_profile)
5077 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5080 /* Emit code to restore saved registers using MOV insns. First register
5081 is restored from POINTER + OFFSET. */
5083 ix86_emit_restore_regs_using_mov (rtx pointer, int offset, int maybe_eh_return)
5087 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5088 if (ix86_save_reg (regno, maybe_eh_return))
5090 emit_move_insn (gen_rtx_REG (Pmode, regno),
5091 adjust_address (gen_rtx_MEM (Pmode, pointer),
5093 offset += UNITS_PER_WORD;
5097 /* Restore function stack, frame, and registers. */
5100 ix86_expand_epilogue (int style)
5103 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5104 struct ix86_frame frame;
5105 HOST_WIDE_INT offset;
5107 ix86_compute_frame_layout (&frame);
5109 /* Calculate start of saved registers relative to ebp. Special care
5110 must be taken for the normal return case of a function using
5111 eh_return: the eax and edx registers are marked as saved, but not
5112 restored along this path. */
5113 offset = frame.nregs;
5114 if (current_function_calls_eh_return && style != 2)
5116 offset *= -UNITS_PER_WORD;
5118 /* If we're only restoring one register and sp is not valid then
5119 using a move instruction to restore the register since it's
5120 less work than reloading sp and popping the register.
5122 The default code result in stack adjustment using add/lea instruction,
5123 while this code results in LEAVE instruction (or discrete equivalent),
5124 so it is profitable in some other cases as well. Especially when there
5125 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5126 and there is exactly one register to pop. This heuristic may need some
5127 tuning in future. */
5128 if ((!sp_valid && frame.nregs <= 1)
5129 || (TARGET_EPILOGUE_USING_MOVE
5130 && cfun->machine->use_fast_prologue_epilogue
5131 && (frame.nregs > 1 || frame.to_allocate))
5132 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5133 || (frame_pointer_needed && TARGET_USE_LEAVE
5134 && cfun->machine->use_fast_prologue_epilogue
5135 && frame.nregs == 1)
5136 || current_function_calls_eh_return)
5138 /* Restore registers. We can use ebp or esp to address the memory
5139 locations. If both are available, default to ebp, since offsets
5140 are known to be small. Only exception is esp pointing directly to the
5141 end of block of saved registers, where we may simplify addressing
5144 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5145 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5146 frame.to_allocate, style == 2);
5148 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5149 offset, style == 2);
5151 /* eh_return epilogues need %ecx added to the stack pointer. */
5154 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5156 if (frame_pointer_needed)
5158 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5159 tmp = plus_constant (tmp, UNITS_PER_WORD);
5160 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5162 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5163 emit_move_insn (hard_frame_pointer_rtx, tmp);
5165 emit_insn (gen_pro_epilogue_adjust_stack
5166 (stack_pointer_rtx, sa, const0_rtx));
5170 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5171 tmp = plus_constant (tmp, (frame.to_allocate
5172 + frame.nregs * UNITS_PER_WORD));
5173 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5176 else if (!frame_pointer_needed)
5177 emit_insn (gen_pro_epilogue_adjust_stack
5178 (stack_pointer_rtx, stack_pointer_rtx,
5179 GEN_INT (frame.to_allocate
5180 + frame.nregs * UNITS_PER_WORD)));
5181 /* If not an i386, mov & pop is faster than "leave". */
5182 else if (TARGET_USE_LEAVE || optimize_size
5183 || !cfun->machine->use_fast_prologue_epilogue)
5184 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5187 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5188 hard_frame_pointer_rtx,
5191 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5193 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5198 /* First step is to deallocate the stack frame so that we can
5199 pop the registers. */
5202 if (!frame_pointer_needed)
5204 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5205 hard_frame_pointer_rtx,
5208 else if (frame.to_allocate)
5209 emit_insn (gen_pro_epilogue_adjust_stack
5210 (stack_pointer_rtx, stack_pointer_rtx,
5211 GEN_INT (frame.to_allocate)));
5213 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5214 if (ix86_save_reg (regno, false))
5217 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5219 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5221 if (frame_pointer_needed)
5223 /* Leave results in shorter dependency chains on CPUs that are
5224 able to grok it fast. */
5225 if (TARGET_USE_LEAVE)
5226 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5227 else if (TARGET_64BIT)
5228 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5230 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5234 /* Sibcall epilogues don't want a return instruction. */
5238 if (current_function_pops_args && current_function_args_size)
5240 rtx popc = GEN_INT (current_function_pops_args);
5242 /* i386 can only pop 64K bytes. If asked to pop more, pop
5243 return address, do explicit add, and jump indirectly to the
5246 if (current_function_pops_args >= 65536)
5248 rtx ecx = gen_rtx_REG (SImode, 2);
5250 /* There are is no "pascal" calling convention in 64bit ABI. */
5254 emit_insn (gen_popsi1 (ecx));
5255 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5256 emit_jump_insn (gen_return_indirect_internal (ecx));
5259 emit_jump_insn (gen_return_pop_internal (popc));
5262 emit_jump_insn (gen_return_internal ());
5265 /* Reset from the function's potential modifications. */
5268 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5269 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5271 if (pic_offset_table_rtx)
5272 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5275 /* Extract the parts of an RTL expression that is a valid memory address
5276 for an instruction. Return 0 if the structure of the address is
5277 grossly off. Return -1 if the address contains ASHIFT, so it is not
5278 strictly valid, but still used for computing length of lea instruction. */
5281 ix86_decompose_address (register rtx addr, struct ix86_address *out)
5283 rtx base = NULL_RTX;
5284 rtx index = NULL_RTX;
5285 rtx disp = NULL_RTX;
5286 HOST_WIDE_INT scale = 1;
5287 rtx scale_rtx = NULL_RTX;
5289 enum ix86_address_seg seg = SEG_DEFAULT;
5291 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5293 else if (GET_CODE (addr) == PLUS)
5303 addends[n++] = XEXP (op, 1);
5306 while (GET_CODE (op) == PLUS);
5311 for (i = n; i >= 0; --i)
5314 switch (GET_CODE (op))
5319 index = XEXP (op, 0);
5320 scale_rtx = XEXP (op, 1);
5324 if (XINT (op, 1) == UNSPEC_TP
5325 && TARGET_TLS_DIRECT_SEG_REFS
5326 && seg == SEG_DEFAULT)
5327 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5356 else if (GET_CODE (addr) == MULT)
5358 index = XEXP (addr, 0); /* index*scale */
5359 scale_rtx = XEXP (addr, 1);
5361 else if (GET_CODE (addr) == ASHIFT)
5365 /* We're called for lea too, which implements ashift on occasion. */
5366 index = XEXP (addr, 0);
5367 tmp = XEXP (addr, 1);
5368 if (GET_CODE (tmp) != CONST_INT)
5370 scale = INTVAL (tmp);
5371 if ((unsigned HOST_WIDE_INT) scale > 3)
5377 disp = addr; /* displacement */
5379 /* Extract the integral value of scale. */
5382 if (GET_CODE (scale_rtx) != CONST_INT)
5384 scale = INTVAL (scale_rtx);
5387 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5388 if (base && index && scale == 1
5389 && (index == arg_pointer_rtx
5390 || index == frame_pointer_rtx
5391 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5398 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5399 if ((base == hard_frame_pointer_rtx
5400 || base == frame_pointer_rtx
5401 || base == arg_pointer_rtx) && !disp)
5404 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5405 Avoid this by transforming to [%esi+0]. */
5406 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5407 && base && !index && !disp
5409 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5412 /* Special case: encode reg+reg instead of reg*2. */
5413 if (!base && index && scale && scale == 2)
5414 base = index, scale = 1;
5416 /* Special case: scaling cannot be encoded without base or displacement. */
5417 if (!base && !disp && index && scale != 1)
5429 /* Return cost of the memory address x.
5430 For i386, it is better to use a complex address than let gcc copy
5431 the address into a reg and make a new pseudo. But not if the address
5432 requires to two regs - that would mean more pseudos with longer
5435 ix86_address_cost (rtx x)
5437 struct ix86_address parts;
5440 if (!ix86_decompose_address (x, &parts))
5443 if (parts.base && GET_CODE (parts.base) == SUBREG)
5444 parts.base = SUBREG_REG (parts.base);
5445 if (parts.index && GET_CODE (parts.index) == SUBREG)
5446 parts.index = SUBREG_REG (parts.index);
5448 /* More complex memory references are better. */
5449 if (parts.disp && parts.disp != const0_rtx)
5451 if (parts.seg != SEG_DEFAULT)
5454 /* Attempt to minimize number of registers in the address. */
5456 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5458 && (!REG_P (parts.index)
5459 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5463 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5465 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5466 && parts.base != parts.index)
5469 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5470 since it's predecode logic can't detect the length of instructions
5471 and it degenerates to vector decoded. Increase cost of such
5472 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5473 to split such addresses or even refuse such addresses at all.
5475 Following addressing modes are affected:
5480 The first and last case may be avoidable by explicitly coding the zero in
5481 memory address, but I don't have AMD-K6 machine handy to check this
5485 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5486 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5487 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5493 /* If X is a machine specific address (i.e. a symbol or label being
5494 referenced as a displacement from the GOT implemented using an
5495 UNSPEC), then return the base term. Otherwise return X. */
5498 ix86_find_base_term (rtx x)
5504 if (GET_CODE (x) != CONST)
5507 if (GET_CODE (term) == PLUS
5508 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5509 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5510 term = XEXP (term, 0);
5511 if (GET_CODE (term) != UNSPEC
5512 || XINT (term, 1) != UNSPEC_GOTPCREL)
5515 term = XVECEXP (term, 0, 0);
5517 if (GET_CODE (term) != SYMBOL_REF
5518 && GET_CODE (term) != LABEL_REF)
5524 term = ix86_delegitimize_address (x);
5526 if (GET_CODE (term) != SYMBOL_REF
5527 && GET_CODE (term) != LABEL_REF)
5533 /* Determine if a given RTX is a valid constant. We already know this
5534 satisfies CONSTANT_P. */
5537 legitimate_constant_p (rtx x)
5541 switch (GET_CODE (x))
5544 /* TLS symbols are not constant. */
5545 if (tls_symbolic_operand (x, Pmode))
5550 inner = XEXP (x, 0);
5552 /* Offsets of TLS symbols are never valid.
5553 Discourage CSE from creating them. */
5554 if (GET_CODE (inner) == PLUS
5555 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5558 if (GET_CODE (inner) == PLUS)
5560 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5562 inner = XEXP (inner, 0);
5565 /* Only some unspecs are valid as "constants". */
5566 if (GET_CODE (inner) == UNSPEC)
5567 switch (XINT (inner, 1))
5571 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5573 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5583 /* Otherwise we handle everything else in the move patterns. */
5587 /* Determine if it's legal to put X into the constant pool. This
5588 is not possible for the address of thread-local symbols, which
5589 is checked above. */
5592 ix86_cannot_force_const_mem (rtx x)
5594 return !legitimate_constant_p (x);
5597 /* Determine if a given RTX is a valid constant address. */
5600 constant_address_p (rtx x)
5602 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5605 /* Nonzero if the constant value X is a legitimate general operand
5606 when generating PIC code. It is given that flag_pic is on and
5607 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5610 legitimate_pic_operand_p (rtx x)
5614 switch (GET_CODE (x))
5617 inner = XEXP (x, 0);
5619 /* Only some unspecs are valid as "constants". */
5620 if (GET_CODE (inner) == UNSPEC)
5621 switch (XINT (inner, 1))
5624 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5632 return legitimate_pic_address_disp_p (x);
5639 /* Determine if a given CONST RTX is a valid memory displacement
5643 legitimate_pic_address_disp_p (register rtx disp)
5647 /* In 64bit mode we can allow direct addresses of symbols and labels
5648 when they are not dynamic symbols. */
5651 /* TLS references should always be enclosed in UNSPEC. */
5652 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5654 if (GET_CODE (disp) == SYMBOL_REF
5655 && ix86_cmodel == CM_SMALL_PIC
5656 && SYMBOL_REF_LOCAL_P (disp))
5658 if (GET_CODE (disp) == LABEL_REF)
5660 if (GET_CODE (disp) == CONST
5661 && GET_CODE (XEXP (disp, 0)) == PLUS
5662 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5663 && ix86_cmodel == CM_SMALL_PIC
5664 && SYMBOL_REF_LOCAL_P (XEXP (XEXP (disp, 0), 0)))
5665 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5666 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5667 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5668 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5671 if (GET_CODE (disp) != CONST)
5673 disp = XEXP (disp, 0);
5677 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5678 of GOT tables. We should not need these anyway. */
5679 if (GET_CODE (disp) != UNSPEC
5680 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5683 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5684 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5690 if (GET_CODE (disp) == PLUS)
5692 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5694 disp = XEXP (disp, 0);
5698 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5699 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5701 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5702 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5703 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5705 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5706 if (strstr (sym_name, "$pb") != 0)
5711 if (GET_CODE (disp) != UNSPEC)
5714 switch (XINT (disp, 1))
5719 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5721 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5722 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5723 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5725 case UNSPEC_GOTTPOFF:
5726 case UNSPEC_GOTNTPOFF:
5727 case UNSPEC_INDNTPOFF:
5730 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5732 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5734 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5740 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5741 memory address for an instruction. The MODE argument is the machine mode
5742 for the MEM expression that wants to use this address.
5744 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5745 convert common non-canonical forms to canonical form so that they will
5749 legitimate_address_p (enum machine_mode mode, register rtx addr, int strict)
5751 struct ix86_address parts;
5752 rtx base, index, disp;
5753 HOST_WIDE_INT scale;
5754 const char *reason = NULL;
5755 rtx reason_rtx = NULL_RTX;
5757 if (TARGET_DEBUG_ADDR)
5760 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5761 GET_MODE_NAME (mode), strict);
5765 if (ix86_decompose_address (addr, &parts) <= 0)
5767 reason = "decomposition failed";
5772 index = parts.index;
5774 scale = parts.scale;
5776 /* Validate base register.
5778 Don't allow SUBREG's here, it can lead to spill failures when the base
5779 is one word out of a two word structure, which is represented internally
5787 if (GET_CODE (base) == SUBREG)
5788 reg = SUBREG_REG (base);
5792 if (GET_CODE (reg) != REG)
5794 reason = "base is not a register";
5798 if (GET_MODE (base) != Pmode)
5800 reason = "base is not in Pmode";
5804 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5805 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5807 reason = "base is not valid";
5812 /* Validate index register.
5814 Don't allow SUBREG's here, it can lead to spill failures when the index
5815 is one word out of a two word structure, which is represented internally
5823 if (GET_CODE (index) == SUBREG)
5824 reg = SUBREG_REG (index);
5828 if (GET_CODE (reg) != REG)
5830 reason = "index is not a register";
5834 if (GET_MODE (index) != Pmode)
5836 reason = "index is not in Pmode";
5840 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5841 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5843 reason = "index is not valid";
5848 /* Validate scale factor. */
5851 reason_rtx = GEN_INT (scale);
5854 reason = "scale without index";
5858 if (scale != 2 && scale != 4 && scale != 8)
5860 reason = "scale is not a valid multiplier";
5865 /* Validate displacement. */
5870 if (GET_CODE (disp) == CONST
5871 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5872 switch (XINT (XEXP (disp, 0), 1))
5876 case UNSPEC_GOTPCREL:
5879 goto is_legitimate_pic;
5881 case UNSPEC_GOTTPOFF:
5882 case UNSPEC_GOTNTPOFF:
5883 case UNSPEC_INDNTPOFF:
5889 reason = "invalid address unspec";
5893 else if (flag_pic && (SYMBOLIC_CONST (disp)
5895 && !machopic_operand_p (disp)
5900 if (TARGET_64BIT && (index || base))
5902 /* foo@dtpoff(%rX) is ok. */
5903 if (GET_CODE (disp) != CONST
5904 || GET_CODE (XEXP (disp, 0)) != PLUS
5905 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5906 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5907 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5908 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5910 reason = "non-constant pic memory reference";
5914 else if (! legitimate_pic_address_disp_p (disp))
5916 reason = "displacement is an invalid pic construct";
5920 /* This code used to verify that a symbolic pic displacement
5921 includes the pic_offset_table_rtx register.
5923 While this is good idea, unfortunately these constructs may
5924 be created by "adds using lea" optimization for incorrect
5933 This code is nonsensical, but results in addressing
5934 GOT table with pic_offset_table_rtx base. We can't
5935 just refuse it easily, since it gets matched by
5936 "addsi3" pattern, that later gets split to lea in the
5937 case output register differs from input. While this
5938 can be handled by separate addsi pattern for this case
5939 that never results in lea, this seems to be easier and
5940 correct fix for crash to disable this test. */
5942 else if (GET_CODE (disp) != LABEL_REF
5943 && GET_CODE (disp) != CONST_INT
5944 && (GET_CODE (disp) != CONST
5945 || !legitimate_constant_p (disp))
5946 && (GET_CODE (disp) != SYMBOL_REF
5947 || !legitimate_constant_p (disp)))
5949 reason = "displacement is not constant";
5952 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5954 reason = "displacement is out of range";
5959 /* Everything looks valid. */
5960 if (TARGET_DEBUG_ADDR)
5961 fprintf (stderr, "Success.\n");
5965 if (TARGET_DEBUG_ADDR)
5967 fprintf (stderr, "Error: %s\n", reason);
5968 debug_rtx (reason_rtx);
5973 /* Return an unique alias set for the GOT. */
5975 static HOST_WIDE_INT
5976 ix86_GOT_alias_set (void)
5978 static HOST_WIDE_INT set = -1;
5980 set = new_alias_set ();
5984 /* Return a legitimate reference for ORIG (an address) using the
5985 register REG. If REG is 0, a new pseudo is generated.
5987 There are two types of references that must be handled:
5989 1. Global data references must load the address from the GOT, via
5990 the PIC reg. An insn is emitted to do this load, and the reg is
5993 2. Static data references, constant pool addresses, and code labels
5994 compute the address as an offset from the GOT, whose base is in
5995 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5996 differentiate them from global data objects. The returned
5997 address is the PIC reg + an unspec constant.
5999 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6000 reg also appears in the address. */
6003 legitimize_pic_address (rtx orig, rtx reg)
6011 reg = gen_reg_rtx (Pmode);
6012 /* Use the generic Mach-O PIC machinery. */
6013 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6016 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6018 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6020 /* This symbol may be referenced via a displacement from the PIC
6021 base address (@GOTOFF). */
6023 if (reload_in_progress)
6024 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6025 if (GET_CODE (addr) == CONST)
6026 addr = XEXP (addr, 0);
6027 if (GET_CODE (addr) == PLUS)
6029 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6030 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6033 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6034 new = gen_rtx_CONST (Pmode, new);
6035 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6039 emit_move_insn (reg, new);
6043 else if (GET_CODE (addr) == SYMBOL_REF)
6047 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6048 new = gen_rtx_CONST (Pmode, new);
6049 new = gen_rtx_MEM (Pmode, new);
6050 RTX_UNCHANGING_P (new) = 1;
6051 set_mem_alias_set (new, ix86_GOT_alias_set ());
6054 reg = gen_reg_rtx (Pmode);
6055 /* Use directly gen_movsi, otherwise the address is loaded
6056 into register for CSE. We don't want to CSE this addresses,
6057 instead we CSE addresses from the GOT table, so skip this. */
6058 emit_insn (gen_movsi (reg, new));
6063 /* This symbol must be referenced via a load from the
6064 Global Offset Table (@GOT). */
6066 if (reload_in_progress)
6067 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6068 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6069 new = gen_rtx_CONST (Pmode, new);
6070 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6071 new = gen_rtx_MEM (Pmode, new);
6072 RTX_UNCHANGING_P (new) = 1;
6073 set_mem_alias_set (new, ix86_GOT_alias_set ());
6076 reg = gen_reg_rtx (Pmode);
6077 emit_move_insn (reg, new);
6083 if (GET_CODE (addr) == CONST)
6085 addr = XEXP (addr, 0);
6087 /* We must match stuff we generate before. Assume the only
6088 unspecs that can get here are ours. Not that we could do
6089 anything with them anyway... */
6090 if (GET_CODE (addr) == UNSPEC
6091 || (GET_CODE (addr) == PLUS
6092 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6094 if (GET_CODE (addr) != PLUS)
6097 if (GET_CODE (addr) == PLUS)
6099 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6101 /* Check first to see if this is a constant offset from a @GOTOFF
6102 symbol reference. */
6103 if (local_symbolic_operand (op0, Pmode)
6104 && GET_CODE (op1) == CONST_INT)
6108 if (reload_in_progress)
6109 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6110 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6112 new = gen_rtx_PLUS (Pmode, new, op1);
6113 new = gen_rtx_CONST (Pmode, new);
6114 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6118 emit_move_insn (reg, new);
6124 if (INTVAL (op1) < -16*1024*1024
6125 || INTVAL (op1) >= 16*1024*1024)
6126 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6131 base = legitimize_pic_address (XEXP (addr, 0), reg);
6132 new = legitimize_pic_address (XEXP (addr, 1),
6133 base == reg ? NULL_RTX : reg);
6135 if (GET_CODE (new) == CONST_INT)
6136 new = plus_constant (base, INTVAL (new));
6139 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6141 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6142 new = XEXP (new, 1);
6144 new = gen_rtx_PLUS (Pmode, base, new);
6152 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6155 get_thread_pointer (int to_reg)
6159 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6163 reg = gen_reg_rtx (Pmode);
6164 insn = gen_rtx_SET (VOIDmode, reg, tp);
6165 insn = emit_insn (insn);
6170 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6171 false if we expect this to be used for a memory address and true if
6172 we expect to load the address into a register. */
6175 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6177 rtx dest, base, off, pic;
6182 case TLS_MODEL_GLOBAL_DYNAMIC:
6183 dest = gen_reg_rtx (Pmode);
6186 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6189 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6190 insns = get_insns ();
6193 emit_libcall_block (insns, dest, rax, x);
6196 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6199 case TLS_MODEL_LOCAL_DYNAMIC:
6200 base = gen_reg_rtx (Pmode);
6203 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6206 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6207 insns = get_insns ();
6210 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6211 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6212 emit_libcall_block (insns, base, rax, note);
6215 emit_insn (gen_tls_local_dynamic_base_32 (base));
6217 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6218 off = gen_rtx_CONST (Pmode, off);
6220 return gen_rtx_PLUS (Pmode, base, off);
6222 case TLS_MODEL_INITIAL_EXEC:
6226 type = UNSPEC_GOTNTPOFF;
6230 if (reload_in_progress)
6231 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6232 pic = pic_offset_table_rtx;
6233 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6235 else if (!TARGET_GNU_TLS)
6237 pic = gen_reg_rtx (Pmode);
6238 emit_insn (gen_set_got (pic));
6239 type = UNSPEC_GOTTPOFF;
6244 type = UNSPEC_INDNTPOFF;
6247 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6248 off = gen_rtx_CONST (Pmode, off);
6250 off = gen_rtx_PLUS (Pmode, pic, off);
6251 off = gen_rtx_MEM (Pmode, off);
6252 RTX_UNCHANGING_P (off) = 1;
6253 set_mem_alias_set (off, ix86_GOT_alias_set ());
6255 if (TARGET_64BIT || TARGET_GNU_TLS)
6257 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6258 off = force_reg (Pmode, off);
6259 return gen_rtx_PLUS (Pmode, base, off);
6263 base = get_thread_pointer (true);
6264 dest = gen_reg_rtx (Pmode);
6265 emit_insn (gen_subsi3 (dest, base, off));
6269 case TLS_MODEL_LOCAL_EXEC:
6270 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6271 (TARGET_64BIT || TARGET_GNU_TLS)
6272 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6273 off = gen_rtx_CONST (Pmode, off);
6275 if (TARGET_64BIT || TARGET_GNU_TLS)
6277 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6278 return gen_rtx_PLUS (Pmode, base, off);
6282 base = get_thread_pointer (true);
6283 dest = gen_reg_rtx (Pmode);
6284 emit_insn (gen_subsi3 (dest, base, off));
6295 /* Try machine-dependent ways of modifying an illegitimate address
6296 to be legitimate. If we find one, return the new, valid address.
6297 This macro is used in only one place: `memory_address' in explow.c.
6299 OLDX is the address as it was before break_out_memory_refs was called.
6300 In some cases it is useful to look at this to decide what needs to be done.
6302 MODE and WIN are passed so that this macro can use
6303 GO_IF_LEGITIMATE_ADDRESS.
6305 It is always safe for this macro to do nothing. It exists to recognize
6306 opportunities to optimize the output.
6308 For the 80386, we handle X+REG by loading X into a register R and
6309 using R+REG. R will go in a general reg and indexing will be used.
6310 However, if REG is a broken-out memory address or multiplication,
6311 nothing needs to be done because REG can certainly go in a general reg.
6313 When -fpic is used, special handling is needed for symbolic references.
6314 See comments by legitimize_pic_address in i386.c for details. */
6317 legitimize_address (register rtx x, register rtx oldx ATTRIBUTE_UNUSED,
6318 enum machine_mode mode)
6323 if (TARGET_DEBUG_ADDR)
6325 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6326 GET_MODE_NAME (mode));
6330 log = tls_symbolic_operand (x, mode);
6332 return legitimize_tls_address (x, log, false);
6334 if (flag_pic && SYMBOLIC_CONST (x))
6335 return legitimize_pic_address (x, 0);
6337 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6338 if (GET_CODE (x) == ASHIFT
6339 && GET_CODE (XEXP (x, 1)) == CONST_INT
6340 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6343 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6344 GEN_INT (1 << log));
6347 if (GET_CODE (x) == PLUS)
6349 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6351 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6352 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6353 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6356 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6357 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6358 GEN_INT (1 << log));
6361 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6362 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6363 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6366 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6367 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6368 GEN_INT (1 << log));
6371 /* Put multiply first if it isn't already. */
6372 if (GET_CODE (XEXP (x, 1)) == MULT)
6374 rtx tmp = XEXP (x, 0);
6375 XEXP (x, 0) = XEXP (x, 1);
6380 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6381 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6382 created by virtual register instantiation, register elimination, and
6383 similar optimizations. */
6384 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6387 x = gen_rtx_PLUS (Pmode,
6388 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6389 XEXP (XEXP (x, 1), 0)),
6390 XEXP (XEXP (x, 1), 1));
6394 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6395 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6396 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6397 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6398 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6399 && CONSTANT_P (XEXP (x, 1)))
6402 rtx other = NULL_RTX;
6404 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6406 constant = XEXP (x, 1);
6407 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6409 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6411 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6412 other = XEXP (x, 1);
6420 x = gen_rtx_PLUS (Pmode,
6421 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6422 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6423 plus_constant (other, INTVAL (constant)));
6427 if (changed && legitimate_address_p (mode, x, FALSE))
6430 if (GET_CODE (XEXP (x, 0)) == MULT)
6433 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6436 if (GET_CODE (XEXP (x, 1)) == MULT)
6439 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6443 && GET_CODE (XEXP (x, 1)) == REG
6444 && GET_CODE (XEXP (x, 0)) == REG)
6447 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6450 x = legitimize_pic_address (x, 0);
6453 if (changed && legitimate_address_p (mode, x, FALSE))
6456 if (GET_CODE (XEXP (x, 0)) == REG)
6458 register rtx temp = gen_reg_rtx (Pmode);
6459 register rtx val = force_operand (XEXP (x, 1), temp);
6461 emit_move_insn (temp, val);
6467 else if (GET_CODE (XEXP (x, 1)) == REG)
6469 register rtx temp = gen_reg_rtx (Pmode);
6470 register rtx val = force_operand (XEXP (x, 0), temp);
6472 emit_move_insn (temp, val);
6482 /* Print an integer constant expression in assembler syntax. Addition
6483 and subtraction are the only arithmetic that may appear in these
6484 expressions. FILE is the stdio stream to write to, X is the rtx, and
6485 CODE is the operand print code from the output string. */
6488 output_pic_addr_const (FILE *file, rtx x, int code)
6492 switch (GET_CODE (x))
6502 assemble_name (file, XSTR (x, 0));
6503 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6504 fputs ("@PLT", file);
6511 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6512 assemble_name (asm_out_file, buf);
6516 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6520 /* This used to output parentheses around the expression,
6521 but that does not work on the 386 (either ATT or BSD assembler). */
6522 output_pic_addr_const (file, XEXP (x, 0), code);
6526 if (GET_MODE (x) == VOIDmode)
6528 /* We can use %d if the number is <32 bits and positive. */
6529 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6530 fprintf (file, "0x%lx%08lx",
6531 (unsigned long) CONST_DOUBLE_HIGH (x),
6532 (unsigned long) CONST_DOUBLE_LOW (x));
6534 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6537 /* We can't handle floating point constants;
6538 PRINT_OPERAND must handle them. */
6539 output_operand_lossage ("floating constant misused");
6543 /* Some assemblers need integer constants to appear first. */
6544 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6546 output_pic_addr_const (file, XEXP (x, 0), code);
6548 output_pic_addr_const (file, XEXP (x, 1), code);
6550 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6552 output_pic_addr_const (file, XEXP (x, 1), code);
6554 output_pic_addr_const (file, XEXP (x, 0), code);
6562 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6563 output_pic_addr_const (file, XEXP (x, 0), code);
6565 output_pic_addr_const (file, XEXP (x, 1), code);
6567 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6571 if (XVECLEN (x, 0) != 1)
6573 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6574 switch (XINT (x, 1))
6577 fputs ("@GOT", file);
6580 fputs ("@GOTOFF", file);
6582 case UNSPEC_GOTPCREL:
6583 fputs ("@GOTPCREL(%rip)", file);
6585 case UNSPEC_GOTTPOFF:
6586 /* FIXME: This might be @TPOFF in Sun ld too. */
6587 fputs ("@GOTTPOFF", file);
6590 fputs ("@TPOFF", file);
6594 fputs ("@TPOFF", file);
6596 fputs ("@NTPOFF", file);
6599 fputs ("@DTPOFF", file);
6601 case UNSPEC_GOTNTPOFF:
6603 fputs ("@GOTTPOFF(%rip)", file);
6605 fputs ("@GOTNTPOFF", file);
6607 case UNSPEC_INDNTPOFF:
6608 fputs ("@INDNTPOFF", file);
6611 output_operand_lossage ("invalid UNSPEC as operand");
6617 output_operand_lossage ("invalid expression as operand");
6621 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6622 We need to handle our special PIC relocations. */
6625 i386_dwarf_output_addr_const (FILE *file, rtx x)
6628 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6632 fprintf (file, "%s", ASM_LONG);
6635 output_pic_addr_const (file, x, '\0');
6637 output_addr_const (file, x);
6641 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6642 We need to emit DTP-relative relocations. */
6645 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6647 fputs (ASM_LONG, file);
6648 output_addr_const (file, x);
6649 fputs ("@DTPOFF", file);
6655 fputs (", 0", file);
6662 /* In the name of slightly smaller debug output, and to cater to
6663 general assembler losage, recognize PIC+GOTOFF and turn it back
6664 into a direct symbol reference. */
6667 ix86_delegitimize_address (rtx orig_x)
6671 if (GET_CODE (x) == MEM)
6676 if (GET_CODE (x) != CONST
6677 || GET_CODE (XEXP (x, 0)) != UNSPEC
6678 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6679 || GET_CODE (orig_x) != MEM)
6681 return XVECEXP (XEXP (x, 0), 0, 0);
6684 if (GET_CODE (x) != PLUS
6685 || GET_CODE (XEXP (x, 1)) != CONST)
6688 if (GET_CODE (XEXP (x, 0)) == REG
6689 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6690 /* %ebx + GOT/GOTOFF */
6692 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6694 /* %ebx + %reg * scale + GOT/GOTOFF */
6696 if (GET_CODE (XEXP (y, 0)) == REG
6697 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6699 else if (GET_CODE (XEXP (y, 1)) == REG
6700 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6704 if (GET_CODE (y) != REG
6705 && GET_CODE (y) != MULT
6706 && GET_CODE (y) != ASHIFT)
6712 x = XEXP (XEXP (x, 1), 0);
6713 if (GET_CODE (x) == UNSPEC
6714 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6715 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6718 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6719 return XVECEXP (x, 0, 0);
6722 if (GET_CODE (x) == PLUS
6723 && GET_CODE (XEXP (x, 0)) == UNSPEC
6724 && GET_CODE (XEXP (x, 1)) == CONST_INT
6725 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6726 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6727 && GET_CODE (orig_x) != MEM)))
6729 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6731 return gen_rtx_PLUS (Pmode, y, x);
6739 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6744 if (mode == CCFPmode || mode == CCFPUmode)
6746 enum rtx_code second_code, bypass_code;
6747 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6748 if (bypass_code != NIL || second_code != NIL)
6750 code = ix86_fp_compare_code_to_integer (code);
6754 code = reverse_condition (code);
6765 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6770 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6771 Those same assemblers have the same but opposite losage on cmov. */
6774 suffix = fp ? "nbe" : "a";
6777 if (mode == CCNOmode || mode == CCGOCmode)
6779 else if (mode == CCmode || mode == CCGCmode)
6790 if (mode == CCNOmode || mode == CCGOCmode)
6792 else if (mode == CCmode || mode == CCGCmode)
6801 suffix = fp ? "nb" : "ae";
6804 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6814 suffix = fp ? "u" : "p";
6817 suffix = fp ? "nu" : "np";
6822 fputs (suffix, file);
6826 print_reg (rtx x, int code, FILE *file)
6828 if (REGNO (x) == ARG_POINTER_REGNUM
6829 || REGNO (x) == FRAME_POINTER_REGNUM
6830 || REGNO (x) == FLAGS_REG
6831 || REGNO (x) == FPSR_REG)
6834 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6837 if (code == 'w' || MMX_REG_P (x))
6839 else if (code == 'b')
6841 else if (code == 'k')
6843 else if (code == 'q')
6845 else if (code == 'y')
6847 else if (code == 'h')
6850 code = GET_MODE_SIZE (GET_MODE (x));
6852 /* Irritatingly, AMD extended registers use different naming convention
6853 from the normal registers. */
6854 if (REX_INT_REG_P (x))
6861 error ("extended registers have no high halves");
6864 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6867 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6870 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6873 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6876 error ("unsupported operand size for extended register");
6884 if (STACK_TOP_P (x))
6886 fputs ("st(0)", file);
6893 if (! ANY_FP_REG_P (x))
6894 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6898 fputs (hi_reg_name[REGNO (x)], file);
6901 fputs (qi_reg_name[REGNO (x)], file);
6904 fputs (qi_high_reg_name[REGNO (x)], file);
6911 /* Locate some local-dynamic symbol still in use by this function
6912 so that we can print its name in some tls_local_dynamic_base
6916 get_some_local_dynamic_name (void)
6920 if (cfun->machine->some_ld_name)
6921 return cfun->machine->some_ld_name;
6923 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6925 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6926 return cfun->machine->some_ld_name;
6932 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6936 if (GET_CODE (x) == SYMBOL_REF
6937 && local_dynamic_symbolic_operand (x, Pmode))
6939 cfun->machine->some_ld_name = XSTR (x, 0);
6947 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6948 C -- print opcode suffix for set/cmov insn.
6949 c -- like C, but print reversed condition
6950 F,f -- likewise, but for floating-point.
6951 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6953 R -- print the prefix for register names.
6954 z -- print the opcode suffix for the size of the current operand.
6955 * -- print a star (in certain assembler syntax)
6956 A -- print an absolute memory reference.
6957 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6958 s -- print a shift double count, followed by the assemblers argument
6960 b -- print the QImode name of the register for the indicated operand.
6961 %b0 would print %al if operands[0] is reg 0.
6962 w -- likewise, print the HImode name of the register.
6963 k -- likewise, print the SImode name of the register.
6964 q -- likewise, print the DImode name of the register.
6965 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6966 y -- print "st(0)" instead of "st" as a register.
6967 D -- print condition for SSE cmp instruction.
6968 P -- if PIC, print an @PLT suffix.
6969 X -- don't print any sort of PIC '@' suffix for a symbol.
6970 & -- print some in-use local-dynamic symbol name.
6974 print_operand (FILE *file, rtx x, int code)
6981 if (ASSEMBLER_DIALECT == ASM_ATT)
6986 assemble_name (file, get_some_local_dynamic_name ());
6990 if (ASSEMBLER_DIALECT == ASM_ATT)
6992 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6994 /* Intel syntax. For absolute addresses, registers should not
6995 be surrounded by braces. */
6996 if (GET_CODE (x) != REG)
6999 PRINT_OPERAND (file, x, 0);
7007 PRINT_OPERAND (file, x, 0);
7012 if (ASSEMBLER_DIALECT == ASM_ATT)
7017 if (ASSEMBLER_DIALECT == ASM_ATT)
7022 if (ASSEMBLER_DIALECT == ASM_ATT)
7027 if (ASSEMBLER_DIALECT == ASM_ATT)
7032 if (ASSEMBLER_DIALECT == ASM_ATT)
7037 if (ASSEMBLER_DIALECT == ASM_ATT)
7042 /* 387 opcodes don't get size suffixes if the operands are
7044 if (STACK_REG_P (x))
7047 /* Likewise if using Intel opcodes. */
7048 if (ASSEMBLER_DIALECT == ASM_INTEL)
7051 /* This is the size of op from size of operand. */
7052 switch (GET_MODE_SIZE (GET_MODE (x)))
7055 #ifdef HAVE_GAS_FILDS_FISTS
7061 if (GET_MODE (x) == SFmode)
7076 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7078 #ifdef GAS_MNEMONICS
7104 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7106 PRINT_OPERAND (file, x, 0);
7112 /* Little bit of braindamage here. The SSE compare instructions
7113 does use completely different names for the comparisons that the
7114 fp conditional moves. */
7115 switch (GET_CODE (x))
7130 fputs ("unord", file);
7134 fputs ("neq", file);
7138 fputs ("nlt", file);
7142 fputs ("nle", file);
7145 fputs ("ord", file);
7153 #ifdef CMOV_SUN_AS_SYNTAX
7154 if (ASSEMBLER_DIALECT == ASM_ATT)
7156 switch (GET_MODE (x))
7158 case HImode: putc ('w', file); break;
7160 case SFmode: putc ('l', file); break;
7162 case DFmode: putc ('q', file); break;
7170 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7173 #ifdef CMOV_SUN_AS_SYNTAX
7174 if (ASSEMBLER_DIALECT == ASM_ATT)
7177 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7180 /* Like above, but reverse condition */
7182 /* Check to see if argument to %c is really a constant
7183 and not a condition code which needs to be reversed. */
7184 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7186 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7189 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7192 #ifdef CMOV_SUN_AS_SYNTAX
7193 if (ASSEMBLER_DIALECT == ASM_ATT)
7196 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7202 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7205 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7208 int pred_val = INTVAL (XEXP (x, 0));
7210 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7211 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7213 int taken = pred_val > REG_BR_PROB_BASE / 2;
7214 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7216 /* Emit hints only in the case default branch prediction
7217 heuristics would fail. */
7218 if (taken != cputaken)
7220 /* We use 3e (DS) prefix for taken branches and
7221 2e (CS) prefix for not taken branches. */
7223 fputs ("ds ; ", file);
7225 fputs ("cs ; ", file);
7232 output_operand_lossage ("invalid operand code `%c'", code);
7236 if (GET_CODE (x) == REG)
7238 PRINT_REG (x, code, file);
7241 else if (GET_CODE (x) == MEM)
7243 /* No `byte ptr' prefix for call instructions. */
7244 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7247 switch (GET_MODE_SIZE (GET_MODE (x)))
7249 case 1: size = "BYTE"; break;
7250 case 2: size = "WORD"; break;
7251 case 4: size = "DWORD"; break;
7252 case 8: size = "QWORD"; break;
7253 case 12: size = "XWORD"; break;
7254 case 16: size = "XMMWORD"; break;
7259 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7262 else if (code == 'w')
7264 else if (code == 'k')
7268 fputs (" PTR ", file);
7272 /* Avoid (%rip) for call operands. */
7273 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7274 && GET_CODE (x) != CONST_INT)
7275 output_addr_const (file, x);
7276 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7277 output_operand_lossage ("invalid constraints for operand");
7282 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7287 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7288 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7290 if (ASSEMBLER_DIALECT == ASM_ATT)
7292 fprintf (file, "0x%lx", l);
7295 /* These float cases don't actually occur as immediate operands. */
7296 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7300 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7301 fprintf (file, "%s", dstr);
7304 else if (GET_CODE (x) == CONST_DOUBLE
7305 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7309 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7310 fprintf (file, "%s", dstr);
7317 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7319 if (ASSEMBLER_DIALECT == ASM_ATT)
7322 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7323 || GET_CODE (x) == LABEL_REF)
7325 if (ASSEMBLER_DIALECT == ASM_ATT)
7328 fputs ("OFFSET FLAT:", file);
7331 if (GET_CODE (x) == CONST_INT)
7332 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7334 output_pic_addr_const (file, x, code);
7336 output_addr_const (file, x);
7340 /* Print a memory operand whose address is ADDR. */
7343 print_operand_address (FILE *file, register rtx addr)
7345 struct ix86_address parts;
7346 rtx base, index, disp;
7349 if (! ix86_decompose_address (addr, &parts))
7353 index = parts.index;
7355 scale = parts.scale;
7363 if (USER_LABEL_PREFIX[0] == 0)
7365 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7371 if (!base && !index)
7373 /* Displacement only requires special attention. */
7375 if (GET_CODE (disp) == CONST_INT)
7377 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7379 if (USER_LABEL_PREFIX[0] == 0)
7381 fputs ("ds:", file);
7383 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7386 output_pic_addr_const (file, disp, 0);
7388 output_addr_const (file, disp);
7390 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7392 && ((GET_CODE (disp) == SYMBOL_REF
7393 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7394 || GET_CODE (disp) == LABEL_REF
7395 || (GET_CODE (disp) == CONST
7396 && GET_CODE (XEXP (disp, 0)) == PLUS
7397 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7398 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7399 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7400 fputs ("(%rip)", file);
7404 if (ASSEMBLER_DIALECT == ASM_ATT)
7409 output_pic_addr_const (file, disp, 0);
7410 else if (GET_CODE (disp) == LABEL_REF)
7411 output_asm_label (disp);
7413 output_addr_const (file, disp);
7418 PRINT_REG (base, 0, file);
7422 PRINT_REG (index, 0, file);
7424 fprintf (file, ",%d", scale);
7430 rtx offset = NULL_RTX;
7434 /* Pull out the offset of a symbol; print any symbol itself. */
7435 if (GET_CODE (disp) == CONST
7436 && GET_CODE (XEXP (disp, 0)) == PLUS
7437 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7439 offset = XEXP (XEXP (disp, 0), 1);
7440 disp = gen_rtx_CONST (VOIDmode,
7441 XEXP (XEXP (disp, 0), 0));
7445 output_pic_addr_const (file, disp, 0);
7446 else if (GET_CODE (disp) == LABEL_REF)
7447 output_asm_label (disp);
7448 else if (GET_CODE (disp) == CONST_INT)
7451 output_addr_const (file, disp);
7457 PRINT_REG (base, 0, file);
7460 if (INTVAL (offset) >= 0)
7462 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7466 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7473 PRINT_REG (index, 0, file);
7475 fprintf (file, "*%d", scale);
7483 output_addr_const_extra (FILE *file, rtx x)
7487 if (GET_CODE (x) != UNSPEC)
7490 op = XVECEXP (x, 0, 0);
7491 switch (XINT (x, 1))
7493 case UNSPEC_GOTTPOFF:
7494 output_addr_const (file, op);
7495 /* FIXME: This might be @TPOFF in Sun ld. */
7496 fputs ("@GOTTPOFF", file);
7499 output_addr_const (file, op);
7500 fputs ("@TPOFF", file);
7503 output_addr_const (file, op);
7505 fputs ("@TPOFF", file);
7507 fputs ("@NTPOFF", file);
7510 output_addr_const (file, op);
7511 fputs ("@DTPOFF", file);
7513 case UNSPEC_GOTNTPOFF:
7514 output_addr_const (file, op);
7516 fputs ("@GOTTPOFF(%rip)", file);
7518 fputs ("@GOTNTPOFF", file);
7520 case UNSPEC_INDNTPOFF:
7521 output_addr_const (file, op);
7522 fputs ("@INDNTPOFF", file);
7532 /* Split one or more DImode RTL references into pairs of SImode
7533 references. The RTL can be REG, offsettable MEM, integer constant, or
7534 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7535 split and "num" is its length. lo_half and hi_half are output arrays
7536 that parallel "operands". */
7539 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7543 rtx op = operands[num];
7545 /* simplify_subreg refuse to split volatile memory addresses,
7546 but we still have to handle it. */
7547 if (GET_CODE (op) == MEM)
7549 lo_half[num] = adjust_address (op, SImode, 0);
7550 hi_half[num] = adjust_address (op, SImode, 4);
7554 lo_half[num] = simplify_gen_subreg (SImode, op,
7555 GET_MODE (op) == VOIDmode
7556 ? DImode : GET_MODE (op), 0);
7557 hi_half[num] = simplify_gen_subreg (SImode, op,
7558 GET_MODE (op) == VOIDmode
7559 ? DImode : GET_MODE (op), 4);
7563 /* Split one or more TImode RTL references into pairs of SImode
7564 references. The RTL can be REG, offsettable MEM, integer constant, or
7565 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7566 split and "num" is its length. lo_half and hi_half are output arrays
7567 that parallel "operands". */
7570 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7574 rtx op = operands[num];
7576 /* simplify_subreg refuse to split volatile memory addresses, but we
7577 still have to handle it. */
7578 if (GET_CODE (op) == MEM)
7580 lo_half[num] = adjust_address (op, DImode, 0);
7581 hi_half[num] = adjust_address (op, DImode, 8);
7585 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7586 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7591 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7592 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7593 is the expression of the binary operation. The output may either be
7594 emitted here, or returned to the caller, like all output_* functions.
7596 There is no guarantee that the operands are the same mode, as they
7597 might be within FLOAT or FLOAT_EXTEND expressions. */
7599 #ifndef SYSV386_COMPAT
7600 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7601 wants to fix the assemblers because that causes incompatibility
7602 with gcc. No-one wants to fix gcc because that causes
7603 incompatibility with assemblers... You can use the option of
7604 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7605 #define SYSV386_COMPAT 1
7609 output_387_binary_op (rtx insn, rtx *operands)
7611 static char buf[30];
7614 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7616 #ifdef ENABLE_CHECKING
7617 /* Even if we do not want to check the inputs, this documents input
7618 constraints. Which helps in understanding the following code. */
7619 if (STACK_REG_P (operands[0])
7620 && ((REG_P (operands[1])
7621 && REGNO (operands[0]) == REGNO (operands[1])
7622 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7623 || (REG_P (operands[2])
7624 && REGNO (operands[0]) == REGNO (operands[2])
7625 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7626 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7632 switch (GET_CODE (operands[3]))
7635 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7636 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7644 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7645 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7653 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7654 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7662 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7663 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7677 if (GET_MODE (operands[0]) == SFmode)
7678 strcat (buf, "ss\t{%2, %0|%0, %2}");
7680 strcat (buf, "sd\t{%2, %0|%0, %2}");
7685 switch (GET_CODE (operands[3]))
7689 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7691 rtx temp = operands[2];
7692 operands[2] = operands[1];
7696 /* know operands[0] == operands[1]. */
7698 if (GET_CODE (operands[2]) == MEM)
7704 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7706 if (STACK_TOP_P (operands[0]))
7707 /* How is it that we are storing to a dead operand[2]?
7708 Well, presumably operands[1] is dead too. We can't
7709 store the result to st(0) as st(0) gets popped on this
7710 instruction. Instead store to operands[2] (which I
7711 think has to be st(1)). st(1) will be popped later.
7712 gcc <= 2.8.1 didn't have this check and generated
7713 assembly code that the Unixware assembler rejected. */
7714 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7716 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7720 if (STACK_TOP_P (operands[0]))
7721 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7723 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7728 if (GET_CODE (operands[1]) == MEM)
7734 if (GET_CODE (operands[2]) == MEM)
7740 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7743 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7744 derived assemblers, confusingly reverse the direction of
7745 the operation for fsub{r} and fdiv{r} when the
7746 destination register is not st(0). The Intel assembler
7747 doesn't have this brain damage. Read !SYSV386_COMPAT to
7748 figure out what the hardware really does. */
7749 if (STACK_TOP_P (operands[0]))
7750 p = "{p\t%0, %2|rp\t%2, %0}";
7752 p = "{rp\t%2, %0|p\t%0, %2}";
7754 if (STACK_TOP_P (operands[0]))
7755 /* As above for fmul/fadd, we can't store to st(0). */
7756 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7758 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7763 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7766 if (STACK_TOP_P (operands[0]))
7767 p = "{rp\t%0, %1|p\t%1, %0}";
7769 p = "{p\t%1, %0|rp\t%0, %1}";
7771 if (STACK_TOP_P (operands[0]))
7772 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7774 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7779 if (STACK_TOP_P (operands[0]))
7781 if (STACK_TOP_P (operands[1]))
7782 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7784 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7787 else if (STACK_TOP_P (operands[1]))
7790 p = "{\t%1, %0|r\t%0, %1}";
7792 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7798 p = "{r\t%2, %0|\t%0, %2}";
7800 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7813 /* Output code to initialize control word copies used by
7814 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7815 is set to control word rounding downwards. */
7817 emit_i387_cw_initialization (rtx normal, rtx round_down)
7819 rtx reg = gen_reg_rtx (HImode);
7821 emit_insn (gen_x86_fnstcw_1 (normal));
7822 emit_move_insn (reg, normal);
7823 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7825 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7827 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7828 emit_move_insn (round_down, reg);
7831 /* Output code for INSN to convert a float to a signed int. OPERANDS
7832 are the insn operands. The output may be [HSD]Imode and the input
7833 operand may be [SDX]Fmode. */
7836 output_fix_trunc (rtx insn, rtx *operands)
7838 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7839 int dimode_p = GET_MODE (operands[0]) == DImode;
7841 /* Jump through a hoop or two for DImode, since the hardware has no
7842 non-popping instruction. We used to do this a different way, but
7843 that was somewhat fragile and broke with post-reload splitters. */
7844 if (dimode_p && !stack_top_dies)
7845 output_asm_insn ("fld\t%y1", operands);
7847 if (!STACK_TOP_P (operands[1]))
7850 if (GET_CODE (operands[0]) != MEM)
7853 output_asm_insn ("fldcw\t%3", operands);
7854 if (stack_top_dies || dimode_p)
7855 output_asm_insn ("fistp%z0\t%0", operands);
7857 output_asm_insn ("fist%z0\t%0", operands);
7858 output_asm_insn ("fldcw\t%2", operands);
7863 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7864 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7865 when fucom should be used. */
7868 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7871 rtx cmp_op0 = operands[0];
7872 rtx cmp_op1 = operands[1];
7873 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7878 cmp_op1 = operands[2];
7882 if (GET_MODE (operands[0]) == SFmode)
7884 return "ucomiss\t{%1, %0|%0, %1}";
7886 return "comiss\t{%1, %0|%0, %1}";
7889 return "ucomisd\t{%1, %0|%0, %1}";
7891 return "comisd\t{%1, %0|%0, %1}";
7894 if (! STACK_TOP_P (cmp_op0))
7897 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7899 if (STACK_REG_P (cmp_op1)
7901 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7902 && REGNO (cmp_op1) != FIRST_STACK_REG)
7904 /* If both the top of the 387 stack dies, and the other operand
7905 is also a stack register that dies, then this must be a
7906 `fcompp' float compare */
7910 /* There is no double popping fcomi variant. Fortunately,
7911 eflags is immune from the fstp's cc clobbering. */
7913 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7915 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7923 return "fucompp\n\tfnstsw\t%0";
7925 return "fcompp\n\tfnstsw\t%0";
7938 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7940 static const char * const alt[24] =
7952 "fcomi\t{%y1, %0|%0, %y1}",
7953 "fcomip\t{%y1, %0|%0, %y1}",
7954 "fucomi\t{%y1, %0|%0, %y1}",
7955 "fucomip\t{%y1, %0|%0, %y1}",
7962 "fcom%z2\t%y2\n\tfnstsw\t%0",
7963 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7964 "fucom%z2\t%y2\n\tfnstsw\t%0",
7965 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7967 "ficom%z2\t%y2\n\tfnstsw\t%0",
7968 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7976 mask = eflags_p << 3;
7977 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7978 mask |= unordered_p << 1;
7979 mask |= stack_top_dies;
7992 ix86_output_addr_vec_elt (FILE *file, int value)
7994 const char *directive = ASM_LONG;
7999 directive = ASM_QUAD;
8005 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8009 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8012 fprintf (file, "%s%s%d-%s%d\n",
8013 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8014 else if (HAVE_AS_GOTOFF_IN_DATA)
8015 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8017 else if (TARGET_MACHO)
8018 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8019 machopic_function_base_name () + 1);
8022 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8023 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8026 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8030 ix86_expand_clear (rtx dest)
8034 /* We play register width games, which are only valid after reload. */
8035 if (!reload_completed)
8038 /* Avoid HImode and its attendant prefix byte. */
8039 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8040 dest = gen_rtx_REG (SImode, REGNO (dest));
8042 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8044 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8045 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8047 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8048 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8054 /* X is an unchanging MEM. If it is a constant pool reference, return
8055 the constant pool rtx, else NULL. */
8058 maybe_get_pool_constant (rtx x)
8060 x = ix86_delegitimize_address (XEXP (x, 0));
8062 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8063 return get_pool_constant (x);
8069 ix86_expand_move (enum machine_mode mode, rtx operands[])
8071 int strict = (reload_in_progress || reload_completed);
8073 enum tls_model model;
8078 model = tls_symbolic_operand (op1, Pmode);
8081 op1 = legitimize_tls_address (op1, model, true);
8082 op1 = force_operand (op1, op0);
8087 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8092 rtx temp = ((reload_in_progress
8093 || ((op0 && GET_CODE (op0) == REG)
8095 ? op0 : gen_reg_rtx (Pmode));
8096 op1 = machopic_indirect_data_reference (op1, temp);
8097 op1 = machopic_legitimize_pic_address (op1, mode,
8098 temp == op1 ? 0 : temp);
8100 else if (MACHOPIC_INDIRECT)
8101 op1 = machopic_indirect_data_reference (op1, 0);
8105 if (GET_CODE (op0) == MEM)
8106 op1 = force_reg (Pmode, op1);
8110 if (GET_CODE (temp) != REG)
8111 temp = gen_reg_rtx (Pmode);
8112 temp = legitimize_pic_address (op1, temp);
8117 #endif /* TARGET_MACHO */
8121 if (GET_CODE (op0) == MEM
8122 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8123 || !push_operand (op0, mode))
8124 && GET_CODE (op1) == MEM)
8125 op1 = force_reg (mode, op1);
8127 if (push_operand (op0, mode)
8128 && ! general_no_elim_operand (op1, mode))
8129 op1 = copy_to_mode_reg (mode, op1);
8131 /* Force large constants in 64bit compilation into register
8132 to get them CSEed. */
8133 if (TARGET_64BIT && mode == DImode
8134 && immediate_operand (op1, mode)
8135 && !x86_64_zero_extended_value (op1)
8136 && !register_operand (op0, mode)
8137 && optimize && !reload_completed && !reload_in_progress)
8138 op1 = copy_to_mode_reg (mode, op1);
8140 if (FLOAT_MODE_P (mode))
8142 /* If we are loading a floating point constant to a register,
8143 force the value to memory now, since we'll get better code
8144 out the back end. */
8148 else if (GET_CODE (op1) == CONST_DOUBLE)
8150 op1 = validize_mem (force_const_mem (mode, op1));
8151 if (!register_operand (op0, mode))
8153 rtx temp = gen_reg_rtx (mode);
8154 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8155 emit_move_insn (op0, temp);
8162 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8166 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8168 /* Force constants other than zero into memory. We do not know how
8169 the instructions used to build constants modify the upper 64 bits
8170 of the register, once we have that information we may be able
8171 to handle some of them more efficiently. */
8172 if ((reload_in_progress | reload_completed) == 0
8173 && register_operand (operands[0], mode)
8174 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8175 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8177 /* Make operand1 a register if it isn't already. */
8179 && !register_operand (operands[0], mode)
8180 && !register_operand (operands[1], mode))
8182 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8183 emit_move_insn (operands[0], temp);
8187 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8190 /* Attempt to expand a binary operator. Make the expansion closer to the
8191 actual machine, then just general_operand, which will allow 3 separate
8192 memory references (one output, two input) in a single insn. */
8195 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8198 int matching_memory;
8199 rtx src1, src2, dst, op, clob;
8205 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8206 if (GET_RTX_CLASS (code) == 'c'
8207 && (rtx_equal_p (dst, src2)
8208 || immediate_operand (src1, mode)))
8215 /* If the destination is memory, and we do not have matching source
8216 operands, do things in registers. */
8217 matching_memory = 0;
8218 if (GET_CODE (dst) == MEM)
8220 if (rtx_equal_p (dst, src1))
8221 matching_memory = 1;
8222 else if (GET_RTX_CLASS (code) == 'c'
8223 && rtx_equal_p (dst, src2))
8224 matching_memory = 2;
8226 dst = gen_reg_rtx (mode);
8229 /* Both source operands cannot be in memory. */
8230 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8232 if (matching_memory != 2)
8233 src2 = force_reg (mode, src2);
8235 src1 = force_reg (mode, src1);
8238 /* If the operation is not commutable, source 1 cannot be a constant
8239 or non-matching memory. */
8240 if ((CONSTANT_P (src1)
8241 || (!matching_memory && GET_CODE (src1) == MEM))
8242 && GET_RTX_CLASS (code) != 'c')
8243 src1 = force_reg (mode, src1);
8245 /* If optimizing, copy to regs to improve CSE */
8246 if (optimize && ! no_new_pseudos)
8248 if (GET_CODE (dst) == MEM)
8249 dst = gen_reg_rtx (mode);
8250 if (GET_CODE (src1) == MEM)
8251 src1 = force_reg (mode, src1);
8252 if (GET_CODE (src2) == MEM)
8253 src2 = force_reg (mode, src2);
8256 /* Emit the instruction. */
8258 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8259 if (reload_in_progress)
8261 /* Reload doesn't know about the flags register, and doesn't know that
8262 it doesn't want to clobber it. We can only do this with PLUS. */
8269 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8270 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8273 /* Fix up the destination if needed. */
8274 if (dst != operands[0])
8275 emit_move_insn (operands[0], dst);
8278 /* Return TRUE or FALSE depending on whether the binary operator meets the
8279 appropriate constraints. */
8282 ix86_binary_operator_ok (enum rtx_code code,
8283 enum machine_mode mode ATTRIBUTE_UNUSED,
8286 /* Both source operands cannot be in memory. */
8287 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8289 /* If the operation is not commutable, source 1 cannot be a constant. */
8290 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8292 /* If the destination is memory, we must have a matching source operand. */
8293 if (GET_CODE (operands[0]) == MEM
8294 && ! (rtx_equal_p (operands[0], operands[1])
8295 || (GET_RTX_CLASS (code) == 'c'
8296 && rtx_equal_p (operands[0], operands[2]))))
8298 /* If the operation is not commutable and the source 1 is memory, we must
8299 have a matching destination. */
8300 if (GET_CODE (operands[1]) == MEM
8301 && GET_RTX_CLASS (code) != 'c'
8302 && ! rtx_equal_p (operands[0], operands[1]))
8307 /* Attempt to expand a unary operator. Make the expansion closer to the
8308 actual machine, then just general_operand, which will allow 2 separate
8309 memory references (one output, one input) in a single insn. */
8312 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8315 int matching_memory;
8316 rtx src, dst, op, clob;
8321 /* If the destination is memory, and we do not have matching source
8322 operands, do things in registers. */
8323 matching_memory = 0;
8324 if (GET_CODE (dst) == MEM)
8326 if (rtx_equal_p (dst, src))
8327 matching_memory = 1;
8329 dst = gen_reg_rtx (mode);
8332 /* When source operand is memory, destination must match. */
8333 if (!matching_memory && GET_CODE (src) == MEM)
8334 src = force_reg (mode, src);
8336 /* If optimizing, copy to regs to improve CSE */
8337 if (optimize && ! no_new_pseudos)
8339 if (GET_CODE (dst) == MEM)
8340 dst = gen_reg_rtx (mode);
8341 if (GET_CODE (src) == MEM)
8342 src = force_reg (mode, src);
8345 /* Emit the instruction. */
8347 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8348 if (reload_in_progress || code == NOT)
8350 /* Reload doesn't know about the flags register, and doesn't know that
8351 it doesn't want to clobber it. */
8358 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8359 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8362 /* Fix up the destination if needed. */
8363 if (dst != operands[0])
8364 emit_move_insn (operands[0], dst);
8367 /* Return TRUE or FALSE depending on whether the unary operator meets the
8368 appropriate constraints. */
8371 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8372 enum machine_mode mode ATTRIBUTE_UNUSED,
8373 rtx operands[2] ATTRIBUTE_UNUSED)
8375 /* If one of operands is memory, source and destination must match. */
8376 if ((GET_CODE (operands[0]) == MEM
8377 || GET_CODE (operands[1]) == MEM)
8378 && ! rtx_equal_p (operands[0], operands[1]))
8383 /* Return TRUE or FALSE depending on whether the first SET in INSN
8384 has source and destination with matching CC modes, and that the
8385 CC mode is at least as constrained as REQ_MODE. */
8388 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8391 enum machine_mode set_mode;
8393 set = PATTERN (insn);
8394 if (GET_CODE (set) == PARALLEL)
8395 set = XVECEXP (set, 0, 0);
8396 if (GET_CODE (set) != SET)
8398 if (GET_CODE (SET_SRC (set)) != COMPARE)
8401 set_mode = GET_MODE (SET_DEST (set));
8405 if (req_mode != CCNOmode
8406 && (req_mode != CCmode
8407 || XEXP (SET_SRC (set), 1) != const0_rtx))
8411 if (req_mode == CCGCmode)
8415 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8419 if (req_mode == CCZmode)
8429 return (GET_MODE (SET_SRC (set)) == set_mode);
8432 /* Generate insn patterns to do an integer compare of OPERANDS. */
8435 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8437 enum machine_mode cmpmode;
8440 cmpmode = SELECT_CC_MODE (code, op0, op1);
8441 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8443 /* This is very simple, but making the interface the same as in the
8444 FP case makes the rest of the code easier. */
8445 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8446 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8448 /* Return the test that should be put into the flags user, i.e.
8449 the bcc, scc, or cmov instruction. */
8450 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8453 /* Figure out whether to use ordered or unordered fp comparisons.
8454 Return the appropriate mode to use. */
8457 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8459 /* ??? In order to make all comparisons reversible, we do all comparisons
8460 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8461 all forms trapping and nontrapping comparisons, we can make inequality
8462 comparisons trapping again, since it results in better code when using
8463 FCOM based compares. */
8464 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8468 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8470 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8471 return ix86_fp_compare_mode (code);
8474 /* Only zero flag is needed. */
8476 case NE: /* ZF!=0 */
8478 /* Codes needing carry flag. */
8479 case GEU: /* CF=0 */
8480 case GTU: /* CF=0 & ZF=0 */
8481 case LTU: /* CF=1 */
8482 case LEU: /* CF=1 | ZF=1 */
8484 /* Codes possibly doable only with sign flag when
8485 comparing against zero. */
8486 case GE: /* SF=OF or SF=0 */
8487 case LT: /* SF<>OF or SF=1 */
8488 if (op1 == const0_rtx)
8491 /* For other cases Carry flag is not required. */
8493 /* Codes doable only with sign flag when comparing
8494 against zero, but we miss jump instruction for it
8495 so we need to use relational tests against overflow
8496 that thus needs to be zero. */
8497 case GT: /* ZF=0 & SF=OF */
8498 case LE: /* ZF=1 | SF<>OF */
8499 if (op1 == const0_rtx)
8503 /* strcmp pattern do (use flags) and combine may ask us for proper
8512 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8515 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8517 enum rtx_code swapped_code = swap_condition (code);
8518 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8519 || (ix86_fp_comparison_cost (swapped_code)
8520 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8523 /* Swap, force into registers, or otherwise massage the two operands
8524 to a fp comparison. The operands are updated in place; the new
8525 comparison code is returned. */
8527 static enum rtx_code
8528 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8530 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8531 rtx op0 = *pop0, op1 = *pop1;
8532 enum machine_mode op_mode = GET_MODE (op0);
8533 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8535 /* All of the unordered compare instructions only work on registers.
8536 The same is true of the XFmode compare instructions. The same is
8537 true of the fcomi compare instructions. */
8540 && (fpcmp_mode == CCFPUmode
8541 || op_mode == XFmode
8542 || op_mode == TFmode
8543 || ix86_use_fcomi_compare (code)))
8545 op0 = force_reg (op_mode, op0);
8546 op1 = force_reg (op_mode, op1);
8550 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8551 things around if they appear profitable, otherwise force op0
8554 if (standard_80387_constant_p (op0) == 0
8555 || (GET_CODE (op0) == MEM
8556 && ! (standard_80387_constant_p (op1) == 0
8557 || GET_CODE (op1) == MEM)))
8560 tmp = op0, op0 = op1, op1 = tmp;
8561 code = swap_condition (code);
8564 if (GET_CODE (op0) != REG)
8565 op0 = force_reg (op_mode, op0);
8567 if (CONSTANT_P (op1))
8569 if (standard_80387_constant_p (op1))
8570 op1 = force_reg (op_mode, op1);
8572 op1 = validize_mem (force_const_mem (op_mode, op1));
8576 /* Try to rearrange the comparison to make it cheaper. */
8577 if (ix86_fp_comparison_cost (code)
8578 > ix86_fp_comparison_cost (swap_condition (code))
8579 && (GET_CODE (op1) == REG || !no_new_pseudos))
8582 tmp = op0, op0 = op1, op1 = tmp;
8583 code = swap_condition (code);
8584 if (GET_CODE (op0) != REG)
8585 op0 = force_reg (op_mode, op0);
8593 /* Convert comparison codes we use to represent FP comparison to integer
8594 code that will result in proper branch. Return UNKNOWN if no such code
8596 static enum rtx_code
8597 ix86_fp_compare_code_to_integer (enum rtx_code code)
8626 /* Split comparison code CODE into comparisons we can do using branch
8627 instructions. BYPASS_CODE is comparison code for branch that will
8628 branch around FIRST_CODE and SECOND_CODE. If some of branches
8629 is not required, set value to NIL.
8630 We never require more than two branches. */
8632 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8633 enum rtx_code *first_code,
8634 enum rtx_code *second_code)
8640 /* The fcomi comparison sets flags as follows:
8650 case GT: /* GTU - CF=0 & ZF=0 */
8651 case GE: /* GEU - CF=0 */
8652 case ORDERED: /* PF=0 */
8653 case UNORDERED: /* PF=1 */
8654 case UNEQ: /* EQ - ZF=1 */
8655 case UNLT: /* LTU - CF=1 */
8656 case UNLE: /* LEU - CF=1 | ZF=1 */
8657 case LTGT: /* EQ - ZF=0 */
8659 case LT: /* LTU - CF=1 - fails on unordered */
8661 *bypass_code = UNORDERED;
8663 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8665 *bypass_code = UNORDERED;
8667 case EQ: /* EQ - ZF=1 - fails on unordered */
8669 *bypass_code = UNORDERED;
8671 case NE: /* NE - ZF=0 - fails on unordered */
8673 *second_code = UNORDERED;
8675 case UNGE: /* GEU - CF=0 - fails on unordered */
8677 *second_code = UNORDERED;
8679 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8681 *second_code = UNORDERED;
8686 if (!TARGET_IEEE_FP)
8693 /* Return cost of comparison done fcom + arithmetics operations on AX.
8694 All following functions do use number of instructions as a cost metrics.
8695 In future this should be tweaked to compute bytes for optimize_size and
8696 take into account performance of various instructions on various CPUs. */
8698 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8700 if (!TARGET_IEEE_FP)
8702 /* The cost of code output by ix86_expand_fp_compare. */
8730 /* Return cost of comparison done using fcomi operation.
8731 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8733 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8735 enum rtx_code bypass_code, first_code, second_code;
8736 /* Return arbitrarily high cost when instruction is not supported - this
8737 prevents gcc from using it. */
8740 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8741 return (bypass_code != NIL || second_code != NIL) + 2;
8744 /* Return cost of comparison done using sahf operation.
8745 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8747 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8749 enum rtx_code bypass_code, first_code, second_code;
8750 /* Return arbitrarily high cost when instruction is not preferred - this
8751 avoids gcc from using it. */
8752 if (!TARGET_USE_SAHF && !optimize_size)
8754 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8755 return (bypass_code != NIL || second_code != NIL) + 3;
8758 /* Compute cost of the comparison done using any method.
8759 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8761 ix86_fp_comparison_cost (enum rtx_code code)
8763 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8766 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8767 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8769 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8770 if (min > sahf_cost)
8772 if (min > fcomi_cost)
8777 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8780 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8781 rtx *second_test, rtx *bypass_test)
8783 enum machine_mode fpcmp_mode, intcmp_mode;
8785 int cost = ix86_fp_comparison_cost (code);
8786 enum rtx_code bypass_code, first_code, second_code;
8788 fpcmp_mode = ix86_fp_compare_mode (code);
8789 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8792 *second_test = NULL_RTX;
8794 *bypass_test = NULL_RTX;
8796 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8798 /* Do fcomi/sahf based test when profitable. */
8799 if ((bypass_code == NIL || bypass_test)
8800 && (second_code == NIL || second_test)
8801 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8805 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8806 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8812 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8813 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8815 scratch = gen_reg_rtx (HImode);
8816 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8817 emit_insn (gen_x86_sahf_1 (scratch));
8820 /* The FP codes work out to act like unsigned. */
8821 intcmp_mode = fpcmp_mode;
8823 if (bypass_code != NIL)
8824 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8825 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8827 if (second_code != NIL)
8828 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8829 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8834 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8835 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8836 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8838 scratch = gen_reg_rtx (HImode);
8839 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8841 /* In the unordered case, we have to check C2 for NaN's, which
8842 doesn't happen to work out to anything nice combination-wise.
8843 So do some bit twiddling on the value we've got in AH to come
8844 up with an appropriate set of condition codes. */
8846 intcmp_mode = CCNOmode;
8851 if (code == GT || !TARGET_IEEE_FP)
8853 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8858 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8859 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8860 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8861 intcmp_mode = CCmode;
8867 if (code == LT && TARGET_IEEE_FP)
8869 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8870 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8871 intcmp_mode = CCmode;
8876 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8882 if (code == GE || !TARGET_IEEE_FP)
8884 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8889 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8890 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8897 if (code == LE && TARGET_IEEE_FP)
8899 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8900 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8901 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8902 intcmp_mode = CCmode;
8907 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8913 if (code == EQ && TARGET_IEEE_FP)
8915 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8916 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8917 intcmp_mode = CCmode;
8922 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8929 if (code == NE && TARGET_IEEE_FP)
8931 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8932 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8938 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8944 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8948 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8957 /* Return the test that should be put into the flags user, i.e.
8958 the bcc, scc, or cmov instruction. */
8959 return gen_rtx_fmt_ee (code, VOIDmode,
8960 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8965 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8968 op0 = ix86_compare_op0;
8969 op1 = ix86_compare_op1;
8972 *second_test = NULL_RTX;
8974 *bypass_test = NULL_RTX;
8976 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8977 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8978 second_test, bypass_test);
8980 ret = ix86_expand_int_compare (code, op0, op1);
8985 /* Return true if the CODE will result in nontrivial jump sequence. */
8987 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8989 enum rtx_code bypass_code, first_code, second_code;
8992 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8993 return bypass_code != NIL || second_code != NIL;
8997 ix86_expand_branch (enum rtx_code code, rtx label)
9001 switch (GET_MODE (ix86_compare_op0))
9007 tmp = ix86_expand_compare (code, NULL, NULL);
9008 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9009 gen_rtx_LABEL_REF (VOIDmode, label),
9011 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9021 enum rtx_code bypass_code, first_code, second_code;
9023 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9026 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9028 /* Check whether we will use the natural sequence with one jump. If
9029 so, we can expand jump early. Otherwise delay expansion by
9030 creating compound insn to not confuse optimizers. */
9031 if (bypass_code == NIL && second_code == NIL
9034 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9035 gen_rtx_LABEL_REF (VOIDmode, label),
9040 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9041 ix86_compare_op0, ix86_compare_op1);
9042 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9043 gen_rtx_LABEL_REF (VOIDmode, label),
9045 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9047 use_fcomi = ix86_use_fcomi_compare (code);
9048 vec = rtvec_alloc (3 + !use_fcomi);
9049 RTVEC_ELT (vec, 0) = tmp;
9051 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9053 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9056 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9058 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9066 /* Expand DImode branch into multiple compare+branch. */
9068 rtx lo[2], hi[2], label2;
9069 enum rtx_code code1, code2, code3;
9071 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9073 tmp = ix86_compare_op0;
9074 ix86_compare_op0 = ix86_compare_op1;
9075 ix86_compare_op1 = tmp;
9076 code = swap_condition (code);
9078 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9079 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9081 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9082 avoid two branches. This costs one extra insn, so disable when
9083 optimizing for size. */
9085 if ((code == EQ || code == NE)
9087 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9092 if (hi[1] != const0_rtx)
9093 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9094 NULL_RTX, 0, OPTAB_WIDEN);
9097 if (lo[1] != const0_rtx)
9098 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9099 NULL_RTX, 0, OPTAB_WIDEN);
9101 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9102 NULL_RTX, 0, OPTAB_WIDEN);
9104 ix86_compare_op0 = tmp;
9105 ix86_compare_op1 = const0_rtx;
9106 ix86_expand_branch (code, label);
9110 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9111 op1 is a constant and the low word is zero, then we can just
9112 examine the high word. */
9114 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9117 case LT: case LTU: case GE: case GEU:
9118 ix86_compare_op0 = hi[0];
9119 ix86_compare_op1 = hi[1];
9120 ix86_expand_branch (code, label);
9126 /* Otherwise, we need two or three jumps. */
9128 label2 = gen_label_rtx ();
9131 code2 = swap_condition (code);
9132 code3 = unsigned_condition (code);
9136 case LT: case GT: case LTU: case GTU:
9139 case LE: code1 = LT; code2 = GT; break;
9140 case GE: code1 = GT; code2 = LT; break;
9141 case LEU: code1 = LTU; code2 = GTU; break;
9142 case GEU: code1 = GTU; code2 = LTU; break;
9144 case EQ: code1 = NIL; code2 = NE; break;
9145 case NE: code2 = NIL; break;
9153 * if (hi(a) < hi(b)) goto true;
9154 * if (hi(a) > hi(b)) goto false;
9155 * if (lo(a) < lo(b)) goto true;
9159 ix86_compare_op0 = hi[0];
9160 ix86_compare_op1 = hi[1];
9163 ix86_expand_branch (code1, label);
9165 ix86_expand_branch (code2, label2);
9167 ix86_compare_op0 = lo[0];
9168 ix86_compare_op1 = lo[1];
9169 ix86_expand_branch (code3, label);
9172 emit_label (label2);
9181 /* Split branch based on floating point condition. */
9183 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9184 rtx target1, rtx target2, rtx tmp)
9187 rtx label = NULL_RTX;
9189 int bypass_probability = -1, second_probability = -1, probability = -1;
9192 if (target2 != pc_rtx)
9195 code = reverse_condition_maybe_unordered (code);
9200 condition = ix86_expand_fp_compare (code, op1, op2,
9201 tmp, &second, &bypass);
9203 if (split_branch_probability >= 0)
9205 /* Distribute the probabilities across the jumps.
9206 Assume the BYPASS and SECOND to be always test
9208 probability = split_branch_probability;
9210 /* Value of 1 is low enough to make no need for probability
9211 to be updated. Later we may run some experiments and see
9212 if unordered values are more frequent in practice. */
9214 bypass_probability = 1;
9216 second_probability = 1;
9218 if (bypass != NULL_RTX)
9220 label = gen_label_rtx ();
9221 i = emit_jump_insn (gen_rtx_SET
9223 gen_rtx_IF_THEN_ELSE (VOIDmode,
9225 gen_rtx_LABEL_REF (VOIDmode,
9228 if (bypass_probability >= 0)
9230 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9231 GEN_INT (bypass_probability),
9234 i = emit_jump_insn (gen_rtx_SET
9236 gen_rtx_IF_THEN_ELSE (VOIDmode,
9237 condition, target1, target2)));
9238 if (probability >= 0)
9240 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9241 GEN_INT (probability),
9243 if (second != NULL_RTX)
9245 i = emit_jump_insn (gen_rtx_SET
9247 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9249 if (second_probability >= 0)
9251 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9252 GEN_INT (second_probability),
9255 if (label != NULL_RTX)
9260 ix86_expand_setcc (enum rtx_code code, rtx dest)
9262 rtx ret, tmp, tmpreg;
9263 rtx second_test, bypass_test;
9265 if (GET_MODE (ix86_compare_op0) == DImode
9267 return 0; /* FAIL */
9269 if (GET_MODE (dest) != QImode)
9272 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9273 PUT_MODE (ret, QImode);
9278 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9279 if (bypass_test || second_test)
9281 rtx test = second_test;
9283 rtx tmp2 = gen_reg_rtx (QImode);
9290 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9292 PUT_MODE (test, QImode);
9293 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9296 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9298 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9301 return 1; /* DONE */
9304 /* Expand comparison setting or clearing carry flag. Return true when successful
9305 and set pop for the operation. */
9307 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9309 enum machine_mode mode =
9310 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9312 /* Do not handle DImode compares that go trought special path. Also we can't
9313 deal with FP compares yet. This is possible to add. */
9314 if ((mode == DImode && !TARGET_64BIT))
9316 if (FLOAT_MODE_P (mode))
9318 rtx second_test = NULL, bypass_test = NULL;
9319 rtx compare_op, compare_seq;
9321 /* Shortcut: following common codes never translate into carry flag compares. */
9322 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9323 || code == ORDERED || code == UNORDERED)
9326 /* These comparisons require zero flag; swap operands so they won't. */
9327 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9333 code = swap_condition (code);
9336 /* Try to expand the comparison and verify that we end up with carry flag
9337 based comparison. This is fails to be true only when we decide to expand
9338 comparison using arithmetic that is not too common scenario. */
9340 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9341 &second_test, &bypass_test);
9342 compare_seq = get_insns ();
9345 if (second_test || bypass_test)
9347 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9348 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9349 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9351 code = GET_CODE (compare_op);
9352 if (code != LTU && code != GEU)
9354 emit_insn (compare_seq);
9358 if (!INTEGRAL_MODE_P (mode))
9366 /* Convert a==0 into (unsigned)a<1. */
9369 if (op1 != const0_rtx)
9372 code = (code == EQ ? LTU : GEU);
9375 /* Convert a>b into b<a or a>=b-1. */
9378 if (GET_CODE (op1) == CONST_INT)
9380 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9381 /* Bail out on overflow. We still can swap operands but that
9382 would force loading of the constant into register. */
9383 if (op1 == const0_rtx
9384 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9386 code = (code == GTU ? GEU : LTU);
9393 code = (code == GTU ? LTU : GEU);
9397 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9400 if (mode == DImode || op1 != const0_rtx)
9402 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9403 code = (code == LT ? GEU : LTU);
9407 if (mode == DImode || op1 != constm1_rtx)
9409 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9410 code = (code == LE ? GEU : LTU);
9416 ix86_compare_op0 = op0;
9417 ix86_compare_op1 = op1;
9418 *pop = ix86_expand_compare (code, NULL, NULL);
9419 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9425 ix86_expand_int_movcc (rtx operands[])
9427 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9428 rtx compare_seq, compare_op;
9429 rtx second_test, bypass_test;
9430 enum machine_mode mode = GET_MODE (operands[0]);
9431 bool sign_bit_compare_p = false;;
9434 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9435 compare_seq = get_insns ();
9438 compare_code = GET_CODE (compare_op);
9440 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9441 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9442 sign_bit_compare_p = true;
9444 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9445 HImode insns, we'd be swallowed in word prefix ops. */
9447 if ((mode != HImode || TARGET_FAST_PREFIX)
9448 && (mode != DImode || TARGET_64BIT)
9449 && GET_CODE (operands[2]) == CONST_INT
9450 && GET_CODE (operands[3]) == CONST_INT)
9452 rtx out = operands[0];
9453 HOST_WIDE_INT ct = INTVAL (operands[2]);
9454 HOST_WIDE_INT cf = INTVAL (operands[3]);
9458 /* Sign bit compares are better done using shifts than we do by using
9460 if (sign_bit_compare_p
9461 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9462 ix86_compare_op1, &compare_op))
9464 /* Detect overlap between destination and compare sources. */
9467 if (!sign_bit_compare_p)
9471 compare_code = GET_CODE (compare_op);
9473 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9474 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9477 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9480 /* To simplify rest of code, restrict to the GEU case. */
9481 if (compare_code == LTU)
9483 HOST_WIDE_INT tmp = ct;
9486 compare_code = reverse_condition (compare_code);
9487 code = reverse_condition (code);
9492 PUT_CODE (compare_op,
9493 reverse_condition_maybe_unordered
9494 (GET_CODE (compare_op)));
9496 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9500 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9501 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9502 tmp = gen_reg_rtx (mode);
9505 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9507 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9511 if (code == GT || code == GE)
9512 code = reverse_condition (code);
9515 HOST_WIDE_INT tmp = ct;
9520 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9521 ix86_compare_op1, VOIDmode, 0, -1);
9534 tmp = expand_simple_binop (mode, PLUS,
9536 copy_rtx (tmp), 1, OPTAB_DIRECT);
9547 tmp = expand_simple_binop (mode, IOR,
9549 copy_rtx (tmp), 1, OPTAB_DIRECT);
9551 else if (diff == -1 && ct)
9561 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9563 tmp = expand_simple_binop (mode, PLUS,
9564 copy_rtx (tmp), GEN_INT (cf),
9565 copy_rtx (tmp), 1, OPTAB_DIRECT);
9573 * andl cf - ct, dest
9583 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9586 tmp = expand_simple_binop (mode, AND,
9588 gen_int_mode (cf - ct, mode),
9589 copy_rtx (tmp), 1, OPTAB_DIRECT);
9591 tmp = expand_simple_binop (mode, PLUS,
9592 copy_rtx (tmp), GEN_INT (ct),
9593 copy_rtx (tmp), 1, OPTAB_DIRECT);
9596 if (!rtx_equal_p (tmp, out))
9597 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9599 return 1; /* DONE */
9605 tmp = ct, ct = cf, cf = tmp;
9607 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9609 /* We may be reversing unordered compare to normal compare, that
9610 is not valid in general (we may convert non-trapping condition
9611 to trapping one), however on i386 we currently emit all
9612 comparisons unordered. */
9613 compare_code = reverse_condition_maybe_unordered (compare_code);
9614 code = reverse_condition_maybe_unordered (code);
9618 compare_code = reverse_condition (compare_code);
9619 code = reverse_condition (code);
9624 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9625 && GET_CODE (ix86_compare_op1) == CONST_INT)
9627 if (ix86_compare_op1 == const0_rtx
9628 && (code == LT || code == GE))
9629 compare_code = code;
9630 else if (ix86_compare_op1 == constm1_rtx)
9634 else if (code == GT)
9639 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9640 if (compare_code != NIL
9641 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9642 && (cf == -1 || ct == -1))
9644 /* If lea code below could be used, only optimize
9645 if it results in a 2 insn sequence. */
9647 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9648 || diff == 3 || diff == 5 || diff == 9)
9649 || (compare_code == LT && ct == -1)
9650 || (compare_code == GE && cf == -1))
9653 * notl op1 (if necessary)
9661 code = reverse_condition (code);
9664 out = emit_store_flag (out, code, ix86_compare_op0,
9665 ix86_compare_op1, VOIDmode, 0, -1);
9667 out = expand_simple_binop (mode, IOR,
9669 out, 1, OPTAB_DIRECT);
9670 if (out != operands[0])
9671 emit_move_insn (operands[0], out);
9673 return 1; /* DONE */
9678 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9679 || diff == 3 || diff == 5 || diff == 9)
9680 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9681 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9687 * lea cf(dest*(ct-cf)),dest
9691 * This also catches the degenerate setcc-only case.
9697 out = emit_store_flag (out, code, ix86_compare_op0,
9698 ix86_compare_op1, VOIDmode, 0, 1);
9701 /* On x86_64 the lea instruction operates on Pmode, so we need
9702 to get arithmetics done in proper mode to match. */
9704 tmp = copy_rtx (out);
9708 out1 = copy_rtx (out);
9709 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9713 tmp = gen_rtx_PLUS (mode, tmp, out1);
9719 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9722 if (!rtx_equal_p (tmp, out))
9725 out = force_operand (tmp, copy_rtx (out));
9727 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9729 if (!rtx_equal_p (out, operands[0]))
9730 emit_move_insn (operands[0], copy_rtx (out));
9732 return 1; /* DONE */
9736 * General case: Jumpful:
9737 * xorl dest,dest cmpl op1, op2
9738 * cmpl op1, op2 movl ct, dest
9740 * decl dest movl cf, dest
9741 * andl (cf-ct),dest 1:
9746 * This is reasonably steep, but branch mispredict costs are
9747 * high on modern cpus, so consider failing only if optimizing
9751 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9752 && BRANCH_COST >= 2)
9758 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9759 /* We may be reversing unordered compare to normal compare,
9760 that is not valid in general (we may convert non-trapping
9761 condition to trapping one), however on i386 we currently
9762 emit all comparisons unordered. */
9763 code = reverse_condition_maybe_unordered (code);
9766 code = reverse_condition (code);
9767 if (compare_code != NIL)
9768 compare_code = reverse_condition (compare_code);
9772 if (compare_code != NIL)
9774 /* notl op1 (if needed)
9779 For x < 0 (resp. x <= -1) there will be no notl,
9780 so if possible swap the constants to get rid of the
9782 True/false will be -1/0 while code below (store flag
9783 followed by decrement) is 0/-1, so the constants need
9784 to be exchanged once more. */
9786 if (compare_code == GE || !cf)
9788 code = reverse_condition (code);
9793 HOST_WIDE_INT tmp = cf;
9798 out = emit_store_flag (out, code, ix86_compare_op0,
9799 ix86_compare_op1, VOIDmode, 0, -1);
9803 out = emit_store_flag (out, code, ix86_compare_op0,
9804 ix86_compare_op1, VOIDmode, 0, 1);
9806 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9807 copy_rtx (out), 1, OPTAB_DIRECT);
9810 out = expand_simple_binop (mode, AND, copy_rtx (out),
9811 gen_int_mode (cf - ct, mode),
9812 copy_rtx (out), 1, OPTAB_DIRECT);
9814 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9815 copy_rtx (out), 1, OPTAB_DIRECT);
9816 if (!rtx_equal_p (out, operands[0]))
9817 emit_move_insn (operands[0], copy_rtx (out));
9819 return 1; /* DONE */
9823 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9825 /* Try a few things more with specific constants and a variable. */
9828 rtx var, orig_out, out, tmp;
9830 if (BRANCH_COST <= 2)
9831 return 0; /* FAIL */
9833 /* If one of the two operands is an interesting constant, load a
9834 constant with the above and mask it in with a logical operation. */
9836 if (GET_CODE (operands[2]) == CONST_INT)
9839 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9840 operands[3] = constm1_rtx, op = and_optab;
9841 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9842 operands[3] = const0_rtx, op = ior_optab;
9844 return 0; /* FAIL */
9846 else if (GET_CODE (operands[3]) == CONST_INT)
9849 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9850 operands[2] = constm1_rtx, op = and_optab;
9851 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9852 operands[2] = const0_rtx, op = ior_optab;
9854 return 0; /* FAIL */
9857 return 0; /* FAIL */
9859 orig_out = operands[0];
9860 tmp = gen_reg_rtx (mode);
9863 /* Recurse to get the constant loaded. */
9864 if (ix86_expand_int_movcc (operands) == 0)
9865 return 0; /* FAIL */
9867 /* Mask in the interesting variable. */
9868 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9870 if (!rtx_equal_p (out, orig_out))
9871 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9873 return 1; /* DONE */
9877 * For comparison with above,
9887 if (! nonimmediate_operand (operands[2], mode))
9888 operands[2] = force_reg (mode, operands[2]);
9889 if (! nonimmediate_operand (operands[3], mode))
9890 operands[3] = force_reg (mode, operands[3]);
9892 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9894 rtx tmp = gen_reg_rtx (mode);
9895 emit_move_insn (tmp, operands[3]);
9898 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9900 rtx tmp = gen_reg_rtx (mode);
9901 emit_move_insn (tmp, operands[2]);
9905 if (! register_operand (operands[2], VOIDmode)
9907 || ! register_operand (operands[3], VOIDmode)))
9908 operands[2] = force_reg (mode, operands[2]);
9911 && ! register_operand (operands[3], VOIDmode))
9912 operands[3] = force_reg (mode, operands[3]);
9914 emit_insn (compare_seq);
9915 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9916 gen_rtx_IF_THEN_ELSE (mode,
9917 compare_op, operands[2],
9920 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9921 gen_rtx_IF_THEN_ELSE (mode,
9923 copy_rtx (operands[3]),
9924 copy_rtx (operands[0]))));
9926 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9927 gen_rtx_IF_THEN_ELSE (mode,
9929 copy_rtx (operands[2]),
9930 copy_rtx (operands[0]))));
9932 return 1; /* DONE */
9936 ix86_expand_fp_movcc (rtx operands[])
9940 rtx compare_op, second_test, bypass_test;
9942 /* For SF/DFmode conditional moves based on comparisons
9943 in same mode, we may want to use SSE min/max instructions. */
9944 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9945 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9946 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9947 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9949 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9950 /* We may be called from the post-reload splitter. */
9951 && (!REG_P (operands[0])
9952 || SSE_REG_P (operands[0])
9953 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9955 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9956 code = GET_CODE (operands[1]);
9958 /* See if we have (cross) match between comparison operands and
9959 conditional move operands. */
9960 if (rtx_equal_p (operands[2], op1))
9965 code = reverse_condition_maybe_unordered (code);
9967 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9969 /* Check for min operation. */
9970 if (code == LT || code == UNLE)
9978 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9979 if (memory_operand (op0, VOIDmode))
9980 op0 = force_reg (GET_MODE (operands[0]), op0);
9981 if (GET_MODE (operands[0]) == SFmode)
9982 emit_insn (gen_minsf3 (operands[0], op0, op1));
9984 emit_insn (gen_mindf3 (operands[0], op0, op1));
9987 /* Check for max operation. */
9988 if (code == GT || code == UNGE)
9996 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9997 if (memory_operand (op0, VOIDmode))
9998 op0 = force_reg (GET_MODE (operands[0]), op0);
9999 if (GET_MODE (operands[0]) == SFmode)
10000 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10002 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10006 /* Manage condition to be sse_comparison_operator. In case we are
10007 in non-ieee mode, try to canonicalize the destination operand
10008 to be first in the comparison - this helps reload to avoid extra
10010 if (!sse_comparison_operator (operands[1], VOIDmode)
10011 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10013 rtx tmp = ix86_compare_op0;
10014 ix86_compare_op0 = ix86_compare_op1;
10015 ix86_compare_op1 = tmp;
10016 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10017 VOIDmode, ix86_compare_op0,
10020 /* Similarly try to manage result to be first operand of conditional
10021 move. We also don't support the NE comparison on SSE, so try to
10023 if ((rtx_equal_p (operands[0], operands[3])
10024 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10025 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10027 rtx tmp = operands[2];
10028 operands[2] = operands[3];
10030 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10031 (GET_CODE (operands[1])),
10032 VOIDmode, ix86_compare_op0,
10035 if (GET_MODE (operands[0]) == SFmode)
10036 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10037 operands[2], operands[3],
10038 ix86_compare_op0, ix86_compare_op1));
10040 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10041 operands[2], operands[3],
10042 ix86_compare_op0, ix86_compare_op1));
10046 /* The floating point conditional move instructions don't directly
10047 support conditions resulting from a signed integer comparison. */
10049 code = GET_CODE (operands[1]);
10050 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10052 /* The floating point conditional move instructions don't directly
10053 support signed integer comparisons. */
10055 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10057 if (second_test != NULL || bypass_test != NULL)
10059 tmp = gen_reg_rtx (QImode);
10060 ix86_expand_setcc (code, tmp);
10062 ix86_compare_op0 = tmp;
10063 ix86_compare_op1 = const0_rtx;
10064 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10066 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10068 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10069 emit_move_insn (tmp, operands[3]);
10072 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10074 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10075 emit_move_insn (tmp, operands[2]);
10079 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10080 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10085 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10086 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10091 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10092 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10100 /* Expand conditional increment or decrement using adb/sbb instructions.
10101 The default case using setcc followed by the conditional move can be
10102 done by generic code. */
10104 ix86_expand_int_addcc (rtx operands[])
10106 enum rtx_code code = GET_CODE (operands[1]);
10108 rtx val = const0_rtx;
10109 bool fpcmp = false;
10110 enum machine_mode mode = GET_MODE (operands[0]);
10112 if (operands[3] != const1_rtx
10113 && operands[3] != constm1_rtx)
10115 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10116 ix86_compare_op1, &compare_op))
10118 code = GET_CODE (compare_op);
10120 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10121 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10124 code = ix86_fp_compare_code_to_integer (code);
10131 PUT_CODE (compare_op,
10132 reverse_condition_maybe_unordered
10133 (GET_CODE (compare_op)));
10135 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10137 PUT_MODE (compare_op, mode);
10139 /* Construct either adc or sbb insn. */
10140 if ((code == LTU) == (operands[3] == constm1_rtx))
10142 switch (GET_MODE (operands[0]))
10145 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10148 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10151 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10154 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10162 switch (GET_MODE (operands[0]))
10165 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10168 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10171 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10174 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10180 return 1; /* DONE */
10184 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10185 works for floating pointer parameters and nonoffsetable memories.
10186 For pushes, it returns just stack offsets; the values will be saved
10187 in the right order. Maximally three parts are generated. */
10190 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10195 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10197 size = (GET_MODE_SIZE (mode) + 4) / 8;
10199 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10201 if (size < 2 || size > 3)
10204 /* Optimize constant pool reference to immediates. This is used by fp
10205 moves, that force all constants to memory to allow combining. */
10206 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10208 rtx tmp = maybe_get_pool_constant (operand);
10213 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10215 /* The only non-offsetable memories we handle are pushes. */
10216 if (! push_operand (operand, VOIDmode))
10219 operand = copy_rtx (operand);
10220 PUT_MODE (operand, Pmode);
10221 parts[0] = parts[1] = parts[2] = operand;
10223 else if (!TARGET_64BIT)
10225 if (mode == DImode)
10226 split_di (&operand, 1, &parts[0], &parts[1]);
10229 if (REG_P (operand))
10231 if (!reload_completed)
10233 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10234 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10236 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10238 else if (offsettable_memref_p (operand))
10240 operand = adjust_address (operand, SImode, 0);
10241 parts[0] = operand;
10242 parts[1] = adjust_address (operand, SImode, 4);
10244 parts[2] = adjust_address (operand, SImode, 8);
10246 else if (GET_CODE (operand) == CONST_DOUBLE)
10251 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10256 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10257 parts[2] = gen_int_mode (l[2], SImode);
10260 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10265 parts[1] = gen_int_mode (l[1], SImode);
10266 parts[0] = gen_int_mode (l[0], SImode);
10274 if (mode == TImode)
10275 split_ti (&operand, 1, &parts[0], &parts[1]);
10276 if (mode == XFmode || mode == TFmode)
10278 if (REG_P (operand))
10280 if (!reload_completed)
10282 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10283 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10285 else if (offsettable_memref_p (operand))
10287 operand = adjust_address (operand, DImode, 0);
10288 parts[0] = operand;
10289 parts[1] = adjust_address (operand, SImode, 8);
10291 else if (GET_CODE (operand) == CONST_DOUBLE)
10296 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10297 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10298 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10299 if (HOST_BITS_PER_WIDE_INT >= 64)
10302 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10303 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10306 parts[0] = immed_double_const (l[0], l[1], DImode);
10307 parts[1] = gen_int_mode (l[2], SImode);
10317 /* Emit insns to perform a move or push of DI, DF, and XF values.
10318 Return false when normal moves are needed; true when all required
10319 insns have been emitted. Operands 2-4 contain the input values
10320 int the correct order; operands 5-7 contain the output values. */
10323 ix86_split_long_move (rtx operands[])
10328 int collisions = 0;
10329 enum machine_mode mode = GET_MODE (operands[0]);
10331 /* The DFmode expanders may ask us to move double.
10332 For 64bit target this is single move. By hiding the fact
10333 here we simplify i386.md splitters. */
10334 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10336 /* Optimize constant pool reference to immediates. This is used by
10337 fp moves, that force all constants to memory to allow combining. */
10339 if (GET_CODE (operands[1]) == MEM
10340 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10341 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10342 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10343 if (push_operand (operands[0], VOIDmode))
10345 operands[0] = copy_rtx (operands[0]);
10346 PUT_MODE (operands[0], Pmode);
10349 operands[0] = gen_lowpart (DImode, operands[0]);
10350 operands[1] = gen_lowpart (DImode, operands[1]);
10351 emit_move_insn (operands[0], operands[1]);
10355 /* The only non-offsettable memory we handle is push. */
10356 if (push_operand (operands[0], VOIDmode))
10358 else if (GET_CODE (operands[0]) == MEM
10359 && ! offsettable_memref_p (operands[0]))
10362 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10363 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10365 /* When emitting push, take care for source operands on the stack. */
10366 if (push && GET_CODE (operands[1]) == MEM
10367 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10370 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10371 XEXP (part[1][2], 0));
10372 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10373 XEXP (part[1][1], 0));
10376 /* We need to do copy in the right order in case an address register
10377 of the source overlaps the destination. */
10378 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10380 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10382 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10385 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10388 /* Collision in the middle part can be handled by reordering. */
10389 if (collisions == 1 && nparts == 3
10390 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10393 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10394 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10397 /* If there are more collisions, we can't handle it by reordering.
10398 Do an lea to the last part and use only one colliding move. */
10399 else if (collisions > 1)
10405 base = part[0][nparts - 1];
10407 /* Handle the case when the last part isn't valid for lea.
10408 Happens in 64-bit mode storing the 12-byte XFmode. */
10409 if (GET_MODE (base) != Pmode)
10410 base = gen_rtx_REG (Pmode, REGNO (base));
10412 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10413 part[1][0] = replace_equiv_address (part[1][0], base);
10414 part[1][1] = replace_equiv_address (part[1][1],
10415 plus_constant (base, UNITS_PER_WORD));
10417 part[1][2] = replace_equiv_address (part[1][2],
10418 plus_constant (base, 8));
10428 /* We use only first 12 bytes of TFmode value, but for pushing we
10429 are required to adjust stack as if we were pushing real 16byte
10431 if (mode == TFmode && !TARGET_64BIT)
10432 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10434 emit_move_insn (part[0][2], part[1][2]);
10439 /* In 64bit mode we don't have 32bit push available. In case this is
10440 register, it is OK - we will just use larger counterpart. We also
10441 retype memory - these comes from attempt to avoid REX prefix on
10442 moving of second half of TFmode value. */
10443 if (GET_MODE (part[1][1]) == SImode)
10445 if (GET_CODE (part[1][1]) == MEM)
10446 part[1][1] = adjust_address (part[1][1], DImode, 0);
10447 else if (REG_P (part[1][1]))
10448 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10451 if (GET_MODE (part[1][0]) == SImode)
10452 part[1][0] = part[1][1];
10455 emit_move_insn (part[0][1], part[1][1]);
10456 emit_move_insn (part[0][0], part[1][0]);
10460 /* Choose correct order to not overwrite the source before it is copied. */
10461 if ((REG_P (part[0][0])
10462 && REG_P (part[1][1])
10463 && (REGNO (part[0][0]) == REGNO (part[1][1])
10465 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10467 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10471 operands[2] = part[0][2];
10472 operands[3] = part[0][1];
10473 operands[4] = part[0][0];
10474 operands[5] = part[1][2];
10475 operands[6] = part[1][1];
10476 operands[7] = part[1][0];
10480 operands[2] = part[0][1];
10481 operands[3] = part[0][0];
10482 operands[5] = part[1][1];
10483 operands[6] = part[1][0];
10490 operands[2] = part[0][0];
10491 operands[3] = part[0][1];
10492 operands[4] = part[0][2];
10493 operands[5] = part[1][0];
10494 operands[6] = part[1][1];
10495 operands[7] = part[1][2];
10499 operands[2] = part[0][0];
10500 operands[3] = part[0][1];
10501 operands[5] = part[1][0];
10502 operands[6] = part[1][1];
10505 emit_move_insn (operands[2], operands[5]);
10506 emit_move_insn (operands[3], operands[6]);
10508 emit_move_insn (operands[4], operands[7]);
10514 ix86_split_ashldi (rtx *operands, rtx scratch)
10516 rtx low[2], high[2];
10519 if (GET_CODE (operands[2]) == CONST_INT)
10521 split_di (operands, 2, low, high);
10522 count = INTVAL (operands[2]) & 63;
10526 emit_move_insn (high[0], low[1]);
10527 emit_move_insn (low[0], const0_rtx);
10530 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10534 if (!rtx_equal_p (operands[0], operands[1]))
10535 emit_move_insn (operands[0], operands[1]);
10536 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10537 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10542 if (!rtx_equal_p (operands[0], operands[1]))
10543 emit_move_insn (operands[0], operands[1]);
10545 split_di (operands, 1, low, high);
10547 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10548 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10550 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10552 if (! no_new_pseudos)
10553 scratch = force_reg (SImode, const0_rtx);
10555 emit_move_insn (scratch, const0_rtx);
10557 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10561 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10566 ix86_split_ashrdi (rtx *operands, rtx scratch)
10568 rtx low[2], high[2];
10571 if (GET_CODE (operands[2]) == CONST_INT)
10573 split_di (operands, 2, low, high);
10574 count = INTVAL (operands[2]) & 63;
10578 emit_move_insn (low[0], high[1]);
10580 if (! reload_completed)
10581 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10584 emit_move_insn (high[0], low[0]);
10585 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10589 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10593 if (!rtx_equal_p (operands[0], operands[1]))
10594 emit_move_insn (operands[0], operands[1]);
10595 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10596 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10601 if (!rtx_equal_p (operands[0], operands[1]))
10602 emit_move_insn (operands[0], operands[1]);
10604 split_di (operands, 1, low, high);
10606 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10607 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10609 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10611 if (! no_new_pseudos)
10612 scratch = gen_reg_rtx (SImode);
10613 emit_move_insn (scratch, high[0]);
10614 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10615 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10619 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10624 ix86_split_lshrdi (rtx *operands, rtx scratch)
10626 rtx low[2], high[2];
10629 if (GET_CODE (operands[2]) == CONST_INT)
10631 split_di (operands, 2, low, high);
10632 count = INTVAL (operands[2]) & 63;
10636 emit_move_insn (low[0], high[1]);
10637 emit_move_insn (high[0], const0_rtx);
10640 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10644 if (!rtx_equal_p (operands[0], operands[1]))
10645 emit_move_insn (operands[0], operands[1]);
10646 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10647 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10652 if (!rtx_equal_p (operands[0], operands[1]))
10653 emit_move_insn (operands[0], operands[1]);
10655 split_di (operands, 1, low, high);
10657 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10658 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10660 /* Heh. By reversing the arguments, we can reuse this pattern. */
10661 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10663 if (! no_new_pseudos)
10664 scratch = force_reg (SImode, const0_rtx);
10666 emit_move_insn (scratch, const0_rtx);
10668 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10672 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10676 /* Helper function for the string operations below. Dest VARIABLE whether
10677 it is aligned to VALUE bytes. If true, jump to the label. */
10679 ix86_expand_aligntest (rtx variable, int value)
10681 rtx label = gen_label_rtx ();
10682 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10683 if (GET_MODE (variable) == DImode)
10684 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10686 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10687 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10692 /* Adjust COUNTER by the VALUE. */
10694 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10696 if (GET_MODE (countreg) == DImode)
10697 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10699 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10702 /* Zero extend possibly SImode EXP to Pmode register. */
10704 ix86_zero_extend_to_Pmode (rtx exp)
10707 if (GET_MODE (exp) == VOIDmode)
10708 return force_reg (Pmode, exp);
10709 if (GET_MODE (exp) == Pmode)
10710 return copy_to_mode_reg (Pmode, exp);
10711 r = gen_reg_rtx (Pmode);
10712 emit_insn (gen_zero_extendsidi2 (r, exp));
10716 /* Expand string move (memcpy) operation. Use i386 string operations when
10717 profitable. expand_clrstr contains similar code. */
10719 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10721 rtx srcreg, destreg, countreg;
10722 enum machine_mode counter_mode;
10723 HOST_WIDE_INT align = 0;
10724 unsigned HOST_WIDE_INT count = 0;
10727 if (GET_CODE (align_exp) == CONST_INT)
10728 align = INTVAL (align_exp);
10730 /* Can't use any of this if the user has appropriated esi or edi. */
10731 if (global_regs[4] || global_regs[5])
10734 /* This simple hack avoids all inlining code and simplifies code below. */
10735 if (!TARGET_ALIGN_STRINGOPS)
10738 if (GET_CODE (count_exp) == CONST_INT)
10740 count = INTVAL (count_exp);
10741 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10745 /* Figure out proper mode for counter. For 32bits it is always SImode,
10746 for 64bits use SImode when possible, otherwise DImode.
10747 Set count to number of bytes copied when known at compile time. */
10748 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10749 || x86_64_zero_extended_value (count_exp))
10750 counter_mode = SImode;
10752 counter_mode = DImode;
10756 if (counter_mode != SImode && counter_mode != DImode)
10759 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10760 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10762 emit_insn (gen_cld ());
10764 /* When optimizing for size emit simple rep ; movsb instruction for
10765 counts not divisible by 4. */
10767 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10769 countreg = ix86_zero_extend_to_Pmode (count_exp);
10771 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10772 destreg, srcreg, countreg));
10774 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10775 destreg, srcreg, countreg));
10778 /* For constant aligned (or small unaligned) copies use rep movsl
10779 followed by code copying the rest. For PentiumPro ensure 8 byte
10780 alignment to allow rep movsl acceleration. */
10782 else if (count != 0
10784 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10785 || optimize_size || count < (unsigned int) 64))
10787 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10788 if (count & ~(size - 1))
10790 countreg = copy_to_mode_reg (counter_mode,
10791 GEN_INT ((count >> (size == 4 ? 2 : 3))
10792 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10793 countreg = ix86_zero_extend_to_Pmode (countreg);
10797 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10798 destreg, srcreg, countreg));
10800 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10801 destreg, srcreg, countreg));
10804 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10805 destreg, srcreg, countreg));
10807 if (size == 8 && (count & 0x04))
10808 emit_insn (gen_strmovsi (destreg, srcreg));
10810 emit_insn (gen_strmovhi (destreg, srcreg));
10812 emit_insn (gen_strmovqi (destreg, srcreg));
10814 /* The generic code based on the glibc implementation:
10815 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10816 allowing accelerated copying there)
10817 - copy the data using rep movsl
10818 - copy the rest. */
10823 int desired_alignment = (TARGET_PENTIUMPRO
10824 && (count == 0 || count >= (unsigned int) 260)
10825 ? 8 : UNITS_PER_WORD);
10827 /* In case we don't know anything about the alignment, default to
10828 library version, since it is usually equally fast and result in
10831 Also emit call when we know that the count is large and call overhead
10832 will not be important. */
10833 if (!TARGET_INLINE_ALL_STRINGOPS
10834 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10840 if (TARGET_SINGLE_STRINGOP)
10841 emit_insn (gen_cld ());
10843 countreg2 = gen_reg_rtx (Pmode);
10844 countreg = copy_to_mode_reg (counter_mode, count_exp);
10846 /* We don't use loops to align destination and to copy parts smaller
10847 than 4 bytes, because gcc is able to optimize such code better (in
10848 the case the destination or the count really is aligned, gcc is often
10849 able to predict the branches) and also it is friendlier to the
10850 hardware branch prediction.
10852 Using loops is beneficial for generic case, because we can
10853 handle small counts using the loops. Many CPUs (such as Athlon)
10854 have large REP prefix setup costs.
10856 This is quite costly. Maybe we can revisit this decision later or
10857 add some customizability to this code. */
10859 if (count == 0 && align < desired_alignment)
10861 label = gen_label_rtx ();
10862 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10863 LEU, 0, counter_mode, 1, label);
10867 rtx label = ix86_expand_aligntest (destreg, 1);
10868 emit_insn (gen_strmovqi (destreg, srcreg));
10869 ix86_adjust_counter (countreg, 1);
10870 emit_label (label);
10871 LABEL_NUSES (label) = 1;
10875 rtx label = ix86_expand_aligntest (destreg, 2);
10876 emit_insn (gen_strmovhi (destreg, srcreg));
10877 ix86_adjust_counter (countreg, 2);
10878 emit_label (label);
10879 LABEL_NUSES (label) = 1;
10881 if (align <= 4 && desired_alignment > 4)
10883 rtx label = ix86_expand_aligntest (destreg, 4);
10884 emit_insn (gen_strmovsi (destreg, srcreg));
10885 ix86_adjust_counter (countreg, 4);
10886 emit_label (label);
10887 LABEL_NUSES (label) = 1;
10890 if (label && desired_alignment > 4 && !TARGET_64BIT)
10892 emit_label (label);
10893 LABEL_NUSES (label) = 1;
10896 if (!TARGET_SINGLE_STRINGOP)
10897 emit_insn (gen_cld ());
10900 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10902 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10903 destreg, srcreg, countreg2));
10907 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10908 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10909 destreg, srcreg, countreg2));
10914 emit_label (label);
10915 LABEL_NUSES (label) = 1;
10917 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10918 emit_insn (gen_strmovsi (destreg, srcreg));
10919 if ((align <= 4 || count == 0) && TARGET_64BIT)
10921 rtx label = ix86_expand_aligntest (countreg, 4);
10922 emit_insn (gen_strmovsi (destreg, srcreg));
10923 emit_label (label);
10924 LABEL_NUSES (label) = 1;
10926 if (align > 2 && count != 0 && (count & 2))
10927 emit_insn (gen_strmovhi (destreg, srcreg));
10928 if (align <= 2 || count == 0)
10930 rtx label = ix86_expand_aligntest (countreg, 2);
10931 emit_insn (gen_strmovhi (destreg, srcreg));
10932 emit_label (label);
10933 LABEL_NUSES (label) = 1;
10935 if (align > 1 && count != 0 && (count & 1))
10936 emit_insn (gen_strmovqi (destreg, srcreg));
10937 if (align <= 1 || count == 0)
10939 rtx label = ix86_expand_aligntest (countreg, 1);
10940 emit_insn (gen_strmovqi (destreg, srcreg));
10941 emit_label (label);
10942 LABEL_NUSES (label) = 1;
10946 insns = get_insns ();
10949 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10954 /* Expand string clear operation (bzero). Use i386 string operations when
10955 profitable. expand_movstr contains similar code. */
10957 ix86_expand_clrstr (rtx src, rtx count_exp, rtx align_exp)
10959 rtx destreg, zeroreg, countreg;
10960 enum machine_mode counter_mode;
10961 HOST_WIDE_INT align = 0;
10962 unsigned HOST_WIDE_INT count = 0;
10964 if (GET_CODE (align_exp) == CONST_INT)
10965 align = INTVAL (align_exp);
10967 /* Can't use any of this if the user has appropriated esi. */
10968 if (global_regs[4])
10971 /* This simple hack avoids all inlining code and simplifies code below. */
10972 if (!TARGET_ALIGN_STRINGOPS)
10975 if (GET_CODE (count_exp) == CONST_INT)
10977 count = INTVAL (count_exp);
10978 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10981 /* Figure out proper mode for counter. For 32bits it is always SImode,
10982 for 64bits use SImode when possible, otherwise DImode.
10983 Set count to number of bytes copied when known at compile time. */
10984 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10985 || x86_64_zero_extended_value (count_exp))
10986 counter_mode = SImode;
10988 counter_mode = DImode;
10990 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10992 emit_insn (gen_cld ());
10994 /* When optimizing for size emit simple rep ; movsb instruction for
10995 counts not divisible by 4. */
10997 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10999 countreg = ix86_zero_extend_to_Pmode (count_exp);
11000 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11002 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11003 destreg, countreg));
11005 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11006 destreg, countreg));
11008 else if (count != 0
11010 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11011 || optimize_size || count < (unsigned int) 64))
11013 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11014 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11015 if (count & ~(size - 1))
11017 countreg = copy_to_mode_reg (counter_mode,
11018 GEN_INT ((count >> (size == 4 ? 2 : 3))
11019 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11020 countreg = ix86_zero_extend_to_Pmode (countreg);
11024 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11025 destreg, countreg));
11027 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11028 destreg, countreg));
11031 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11032 destreg, countreg));
11034 if (size == 8 && (count & 0x04))
11035 emit_insn (gen_strsetsi (destreg,
11036 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11038 emit_insn (gen_strsethi (destreg,
11039 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11041 emit_insn (gen_strsetqi (destreg,
11042 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11048 /* Compute desired alignment of the string operation. */
11049 int desired_alignment = (TARGET_PENTIUMPRO
11050 && (count == 0 || count >= (unsigned int) 260)
11051 ? 8 : UNITS_PER_WORD);
11053 /* In case we don't know anything about the alignment, default to
11054 library version, since it is usually equally fast and result in
11057 Also emit call when we know that the count is large and call overhead
11058 will not be important. */
11059 if (!TARGET_INLINE_ALL_STRINGOPS
11060 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11063 if (TARGET_SINGLE_STRINGOP)
11064 emit_insn (gen_cld ());
11066 countreg2 = gen_reg_rtx (Pmode);
11067 countreg = copy_to_mode_reg (counter_mode, count_exp);
11068 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11070 if (count == 0 && align < desired_alignment)
11072 label = gen_label_rtx ();
11073 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11074 LEU, 0, counter_mode, 1, label);
11078 rtx label = ix86_expand_aligntest (destreg, 1);
11079 emit_insn (gen_strsetqi (destreg,
11080 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11081 ix86_adjust_counter (countreg, 1);
11082 emit_label (label);
11083 LABEL_NUSES (label) = 1;
11087 rtx label = ix86_expand_aligntest (destreg, 2);
11088 emit_insn (gen_strsethi (destreg,
11089 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11090 ix86_adjust_counter (countreg, 2);
11091 emit_label (label);
11092 LABEL_NUSES (label) = 1;
11094 if (align <= 4 && desired_alignment > 4)
11096 rtx label = ix86_expand_aligntest (destreg, 4);
11097 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11098 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11100 ix86_adjust_counter (countreg, 4);
11101 emit_label (label);
11102 LABEL_NUSES (label) = 1;
11105 if (label && desired_alignment > 4 && !TARGET_64BIT)
11107 emit_label (label);
11108 LABEL_NUSES (label) = 1;
11112 if (!TARGET_SINGLE_STRINGOP)
11113 emit_insn (gen_cld ());
11116 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11118 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11119 destreg, countreg2));
11123 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11124 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11125 destreg, countreg2));
11129 emit_label (label);
11130 LABEL_NUSES (label) = 1;
11133 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11134 emit_insn (gen_strsetsi (destreg,
11135 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11136 if (TARGET_64BIT && (align <= 4 || count == 0))
11138 rtx label = ix86_expand_aligntest (countreg, 4);
11139 emit_insn (gen_strsetsi (destreg,
11140 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11141 emit_label (label);
11142 LABEL_NUSES (label) = 1;
11144 if (align > 2 && count != 0 && (count & 2))
11145 emit_insn (gen_strsethi (destreg,
11146 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11147 if (align <= 2 || count == 0)
11149 rtx label = ix86_expand_aligntest (countreg, 2);
11150 emit_insn (gen_strsethi (destreg,
11151 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11152 emit_label (label);
11153 LABEL_NUSES (label) = 1;
11155 if (align > 1 && count != 0 && (count & 1))
11156 emit_insn (gen_strsetqi (destreg,
11157 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11158 if (align <= 1 || count == 0)
11160 rtx label = ix86_expand_aligntest (countreg, 1);
11161 emit_insn (gen_strsetqi (destreg,
11162 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11163 emit_label (label);
11164 LABEL_NUSES (label) = 1;
11169 /* Expand strlen. */
11171 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11173 rtx addr, scratch1, scratch2, scratch3, scratch4;
11175 /* The generic case of strlen expander is long. Avoid it's
11176 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11178 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11179 && !TARGET_INLINE_ALL_STRINGOPS
11181 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11184 addr = force_reg (Pmode, XEXP (src, 0));
11185 scratch1 = gen_reg_rtx (Pmode);
11187 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11190 /* Well it seems that some optimizer does not combine a call like
11191 foo(strlen(bar), strlen(bar));
11192 when the move and the subtraction is done here. It does calculate
11193 the length just once when these instructions are done inside of
11194 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11195 often used and I use one fewer register for the lifetime of
11196 output_strlen_unroll() this is better. */
11198 emit_move_insn (out, addr);
11200 ix86_expand_strlensi_unroll_1 (out, align);
11202 /* strlensi_unroll_1 returns the address of the zero at the end of
11203 the string, like memchr(), so compute the length by subtracting
11204 the start address. */
11206 emit_insn (gen_subdi3 (out, out, addr));
11208 emit_insn (gen_subsi3 (out, out, addr));
11212 scratch2 = gen_reg_rtx (Pmode);
11213 scratch3 = gen_reg_rtx (Pmode);
11214 scratch4 = force_reg (Pmode, constm1_rtx);
11216 emit_move_insn (scratch3, addr);
11217 eoschar = force_reg (QImode, eoschar);
11219 emit_insn (gen_cld ());
11222 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11223 align, scratch4, scratch3));
11224 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11225 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11229 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11230 align, scratch4, scratch3));
11231 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11232 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11238 /* Expand the appropriate insns for doing strlen if not just doing
11241 out = result, initialized with the start address
11242 align_rtx = alignment of the address.
11243 scratch = scratch register, initialized with the startaddress when
11244 not aligned, otherwise undefined
11246 This is just the body. It needs the initializations mentioned above and
11247 some address computing at the end. These things are done in i386.md. */
11250 ix86_expand_strlensi_unroll_1 (rtx out, rtx align_rtx)
11254 rtx align_2_label = NULL_RTX;
11255 rtx align_3_label = NULL_RTX;
11256 rtx align_4_label = gen_label_rtx ();
11257 rtx end_0_label = gen_label_rtx ();
11259 rtx tmpreg = gen_reg_rtx (SImode);
11260 rtx scratch = gen_reg_rtx (SImode);
11264 if (GET_CODE (align_rtx) == CONST_INT)
11265 align = INTVAL (align_rtx);
11267 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11269 /* Is there a known alignment and is it less than 4? */
11272 rtx scratch1 = gen_reg_rtx (Pmode);
11273 emit_move_insn (scratch1, out);
11274 /* Is there a known alignment and is it not 2? */
11277 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11278 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11280 /* Leave just the 3 lower bits. */
11281 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11282 NULL_RTX, 0, OPTAB_WIDEN);
11284 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11285 Pmode, 1, align_4_label);
11286 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11287 Pmode, 1, align_2_label);
11288 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11289 Pmode, 1, align_3_label);
11293 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11294 check if is aligned to 4 - byte. */
11296 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11297 NULL_RTX, 0, OPTAB_WIDEN);
11299 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11300 Pmode, 1, align_4_label);
11303 mem = gen_rtx_MEM (QImode, out);
11305 /* Now compare the bytes. */
11307 /* Compare the first n unaligned byte on a byte per byte basis. */
11308 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11309 QImode, 1, end_0_label);
11311 /* Increment the address. */
11313 emit_insn (gen_adddi3 (out, out, const1_rtx));
11315 emit_insn (gen_addsi3 (out, out, const1_rtx));
11317 /* Not needed with an alignment of 2 */
11320 emit_label (align_2_label);
11322 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11326 emit_insn (gen_adddi3 (out, out, const1_rtx));
11328 emit_insn (gen_addsi3 (out, out, const1_rtx));
11330 emit_label (align_3_label);
11333 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11337 emit_insn (gen_adddi3 (out, out, const1_rtx));
11339 emit_insn (gen_addsi3 (out, out, const1_rtx));
11342 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11343 align this loop. It gives only huge programs, but does not help to
11345 emit_label (align_4_label);
11347 mem = gen_rtx_MEM (SImode, out);
11348 emit_move_insn (scratch, mem);
11350 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11352 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11354 /* This formula yields a nonzero result iff one of the bytes is zero.
11355 This saves three branches inside loop and many cycles. */
11357 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11358 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11359 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11360 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11361 gen_int_mode (0x80808080, SImode)));
11362 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11367 rtx reg = gen_reg_rtx (SImode);
11368 rtx reg2 = gen_reg_rtx (Pmode);
11369 emit_move_insn (reg, tmpreg);
11370 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11372 /* If zero is not in the first two bytes, move two bytes forward. */
11373 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11374 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11375 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11376 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11377 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11380 /* Emit lea manually to avoid clobbering of flags. */
11381 emit_insn (gen_rtx_SET (SImode, reg2,
11382 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11384 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11385 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11386 emit_insn (gen_rtx_SET (VOIDmode, out,
11387 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11394 rtx end_2_label = gen_label_rtx ();
11395 /* Is zero in the first two bytes? */
11397 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11398 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11399 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11400 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11401 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11403 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11404 JUMP_LABEL (tmp) = end_2_label;
11406 /* Not in the first two. Move two bytes forward. */
11407 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11409 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11411 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11413 emit_label (end_2_label);
11417 /* Avoid branch in fixing the byte. */
11418 tmpreg = gen_lowpart (QImode, tmpreg);
11419 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11420 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11422 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11424 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11426 emit_label (end_0_label);
11430 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx callarg2,
11431 rtx pop, int sibcall)
11433 rtx use = NULL, call;
11435 if (pop == const0_rtx)
11437 if (TARGET_64BIT && pop)
11441 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11442 fnaddr = machopic_indirect_call_target (fnaddr);
11444 /* Static functions and indirect calls don't need the pic register. */
11445 if (! TARGET_64BIT && flag_pic
11446 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11447 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11448 use_reg (&use, pic_offset_table_rtx);
11450 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11452 rtx al = gen_rtx_REG (QImode, 0);
11453 emit_move_insn (al, callarg2);
11454 use_reg (&use, al);
11456 #endif /* TARGET_MACHO */
11458 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11460 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11461 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11463 if (sibcall && TARGET_64BIT
11464 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11467 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11468 fnaddr = gen_rtx_REG (Pmode, 40);
11469 emit_move_insn (fnaddr, addr);
11470 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11473 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11475 call = gen_rtx_SET (VOIDmode, retval, call);
11478 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11479 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11480 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11483 call = emit_call_insn (call);
11485 CALL_INSN_FUNCTION_USAGE (call) = use;
11489 /* Clear stack slot assignments remembered from previous functions.
11490 This is called from INIT_EXPANDERS once before RTL is emitted for each
11493 static struct machine_function *
11494 ix86_init_machine_status (void)
11496 struct machine_function *f;
11498 f = ggc_alloc_cleared (sizeof (struct machine_function));
11499 f->use_fast_prologue_epilogue_nregs = -1;
11504 /* Return a MEM corresponding to a stack slot with mode MODE.
11505 Allocate a new slot if necessary.
11507 The RTL for a function can have several slots available: N is
11508 which slot to use. */
11511 assign_386_stack_local (enum machine_mode mode, int n)
11513 struct stack_local_entry *s;
11515 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11518 for (s = ix86_stack_locals; s; s = s->next)
11519 if (s->mode == mode && s->n == n)
11522 s = (struct stack_local_entry *)
11523 ggc_alloc (sizeof (struct stack_local_entry));
11526 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11528 s->next = ix86_stack_locals;
11529 ix86_stack_locals = s;
11533 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11535 static GTY(()) rtx ix86_tls_symbol;
11537 ix86_tls_get_addr (void)
11540 if (!ix86_tls_symbol)
11542 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11543 (TARGET_GNU_TLS && !TARGET_64BIT)
11544 ? "___tls_get_addr"
11545 : "__tls_get_addr");
11548 return ix86_tls_symbol;
11551 /* Calculate the length of the memory address in the instruction
11552 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11555 memory_address_length (rtx addr)
11557 struct ix86_address parts;
11558 rtx base, index, disp;
11561 if (GET_CODE (addr) == PRE_DEC
11562 || GET_CODE (addr) == POST_INC
11563 || GET_CODE (addr) == PRE_MODIFY
11564 || GET_CODE (addr) == POST_MODIFY)
11567 if (! ix86_decompose_address (addr, &parts))
11571 index = parts.index;
11575 /* Register Indirect. */
11576 if (base && !index && !disp)
11578 /* Special cases: ebp and esp need the two-byte modrm form. */
11579 if (addr == stack_pointer_rtx
11580 || addr == arg_pointer_rtx
11581 || addr == frame_pointer_rtx
11582 || addr == hard_frame_pointer_rtx)
11586 /* Direct Addressing. */
11587 else if (disp && !base && !index)
11592 /* Find the length of the displacement constant. */
11595 if (GET_CODE (disp) == CONST_INT
11596 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11603 /* An index requires the two-byte modrm form. */
11611 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11612 is set, expect that insn have 8bit immediate alternative. */
11614 ix86_attr_length_immediate_default (rtx insn, int shortform)
11618 extract_insn_cached (insn);
11619 for (i = recog_data.n_operands - 1; i >= 0; --i)
11620 if (CONSTANT_P (recog_data.operand[i]))
11625 && GET_CODE (recog_data.operand[i]) == CONST_INT
11626 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11630 switch (get_attr_mode (insn))
11641 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11646 fatal_insn ("unknown insn mode", insn);
11652 /* Compute default value for "length_address" attribute. */
11654 ix86_attr_length_address_default (rtx insn)
11658 if (get_attr_type (insn) == TYPE_LEA)
11660 rtx set = PATTERN (insn);
11661 if (GET_CODE (set) == SET)
11663 else if (GET_CODE (set) == PARALLEL
11664 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11665 set = XVECEXP (set, 0, 0);
11668 #ifdef ENABLE_CHECKING
11674 return memory_address_length (SET_SRC (set));
11677 extract_insn_cached (insn);
11678 for (i = recog_data.n_operands - 1; i >= 0; --i)
11679 if (GET_CODE (recog_data.operand[i]) == MEM)
11681 return memory_address_length (XEXP (recog_data.operand[i], 0));
11687 /* Return the maximum number of instructions a cpu can issue. */
11690 ix86_issue_rate (void)
11694 case PROCESSOR_PENTIUM:
11698 case PROCESSOR_PENTIUMPRO:
11699 case PROCESSOR_PENTIUM4:
11700 case PROCESSOR_ATHLON:
11709 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11710 by DEP_INSN and nothing set by DEP_INSN. */
11713 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11717 /* Simplify the test for uninteresting insns. */
11718 if (insn_type != TYPE_SETCC
11719 && insn_type != TYPE_ICMOV
11720 && insn_type != TYPE_FCMOV
11721 && insn_type != TYPE_IBR)
11724 if ((set = single_set (dep_insn)) != 0)
11726 set = SET_DEST (set);
11729 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11730 && XVECLEN (PATTERN (dep_insn), 0) == 2
11731 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11732 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11734 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11735 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11740 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11743 /* This test is true if the dependent insn reads the flags but
11744 not any other potentially set register. */
11745 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11748 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11754 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11755 address with operands set by DEP_INSN. */
11758 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11762 if (insn_type == TYPE_LEA
11765 addr = PATTERN (insn);
11766 if (GET_CODE (addr) == SET)
11768 else if (GET_CODE (addr) == PARALLEL
11769 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11770 addr = XVECEXP (addr, 0, 0);
11773 addr = SET_SRC (addr);
11778 extract_insn_cached (insn);
11779 for (i = recog_data.n_operands - 1; i >= 0; --i)
11780 if (GET_CODE (recog_data.operand[i]) == MEM)
11782 addr = XEXP (recog_data.operand[i], 0);
11789 return modified_in_p (addr, dep_insn);
11793 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11795 enum attr_type insn_type, dep_insn_type;
11796 enum attr_memory memory, dep_memory;
11798 int dep_insn_code_number;
11800 /* Anti and output dependencies have zero cost on all CPUs. */
11801 if (REG_NOTE_KIND (link) != 0)
11804 dep_insn_code_number = recog_memoized (dep_insn);
11806 /* If we can't recognize the insns, we can't really do anything. */
11807 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11810 insn_type = get_attr_type (insn);
11811 dep_insn_type = get_attr_type (dep_insn);
11815 case PROCESSOR_PENTIUM:
11816 /* Address Generation Interlock adds a cycle of latency. */
11817 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11820 /* ??? Compares pair with jump/setcc. */
11821 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11824 /* Floating point stores require value to be ready one cycle earlier. */
11825 if (insn_type == TYPE_FMOV
11826 && get_attr_memory (insn) == MEMORY_STORE
11827 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11831 case PROCESSOR_PENTIUMPRO:
11832 memory = get_attr_memory (insn);
11833 dep_memory = get_attr_memory (dep_insn);
11835 /* Since we can't represent delayed latencies of load+operation,
11836 increase the cost here for non-imov insns. */
11837 if (dep_insn_type != TYPE_IMOV
11838 && dep_insn_type != TYPE_FMOV
11839 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11842 /* INT->FP conversion is expensive. */
11843 if (get_attr_fp_int_src (dep_insn))
11846 /* There is one cycle extra latency between an FP op and a store. */
11847 if (insn_type == TYPE_FMOV
11848 && (set = single_set (dep_insn)) != NULL_RTX
11849 && (set2 = single_set (insn)) != NULL_RTX
11850 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11851 && GET_CODE (SET_DEST (set2)) == MEM)
11854 /* Show ability of reorder buffer to hide latency of load by executing
11855 in parallel with previous instruction in case
11856 previous instruction is not needed to compute the address. */
11857 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11858 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11860 /* Claim moves to take one cycle, as core can issue one load
11861 at time and the next load can start cycle later. */
11862 if (dep_insn_type == TYPE_IMOV
11863 || dep_insn_type == TYPE_FMOV)
11871 memory = get_attr_memory (insn);
11872 dep_memory = get_attr_memory (dep_insn);
11873 /* The esp dependency is resolved before the instruction is really
11875 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11876 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11879 /* Since we can't represent delayed latencies of load+operation,
11880 increase the cost here for non-imov insns. */
11881 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11882 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11884 /* INT->FP conversion is expensive. */
11885 if (get_attr_fp_int_src (dep_insn))
11888 /* Show ability of reorder buffer to hide latency of load by executing
11889 in parallel with previous instruction in case
11890 previous instruction is not needed to compute the address. */
11891 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11892 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11894 /* Claim moves to take one cycle, as core can issue one load
11895 at time and the next load can start cycle later. */
11896 if (dep_insn_type == TYPE_IMOV
11897 || dep_insn_type == TYPE_FMOV)
11906 case PROCESSOR_ATHLON:
11908 memory = get_attr_memory (insn);
11909 dep_memory = get_attr_memory (dep_insn);
11911 /* Show ability of reorder buffer to hide latency of load by executing
11912 in parallel with previous instruction in case
11913 previous instruction is not needed to compute the address. */
11914 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11915 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11917 enum attr_unit unit = get_attr_unit (insn);
11920 /* Because of the difference between the length of integer and
11921 floating unit pipeline preparation stages, the memory operands
11922 for floating point are cheaper.
11924 ??? For Athlon it the difference is most probably 2. */
11925 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11928 loadcost = TARGET_ATHLON ? 2 : 0;
11930 if (cost >= loadcost)
11945 struct ppro_sched_data
11948 int issued_this_cycle;
11952 static enum attr_ppro_uops
11953 ix86_safe_ppro_uops (rtx insn)
11955 if (recog_memoized (insn) >= 0)
11956 return get_attr_ppro_uops (insn);
11958 return PPRO_UOPS_MANY;
11962 ix86_dump_ppro_packet (FILE *dump)
11964 if (ix86_sched_data.ppro.decode[0])
11966 fprintf (dump, "PPRO packet: %d",
11967 INSN_UID (ix86_sched_data.ppro.decode[0]));
11968 if (ix86_sched_data.ppro.decode[1])
11969 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11970 if (ix86_sched_data.ppro.decode[2])
11971 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11972 fputc ('\n', dump);
11976 /* We're beginning a new block. Initialize data structures as necessary. */
11979 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
11980 int sched_verbose ATTRIBUTE_UNUSED,
11981 int veclen ATTRIBUTE_UNUSED)
11983 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11986 /* Shift INSN to SLOT, and shift everything else down. */
11989 ix86_reorder_insn (rtx *insnp, rtx *slot)
11995 insnp[0] = insnp[1];
11996 while (++insnp != slot);
12002 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12005 enum attr_ppro_uops cur_uops;
12006 int issued_this_cycle;
12010 /* At this point .ppro.decode contains the state of the three
12011 decoders from last "cycle". That is, those insns that were
12012 actually independent. But here we're scheduling for the
12013 decoder, and we may find things that are decodable in the
12016 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12017 issued_this_cycle = 0;
12020 cur_uops = ix86_safe_ppro_uops (*insnp);
12022 /* If the decoders are empty, and we've a complex insn at the
12023 head of the priority queue, let it issue without complaint. */
12024 if (decode[0] == NULL)
12026 if (cur_uops == PPRO_UOPS_MANY)
12028 decode[0] = *insnp;
12032 /* Otherwise, search for a 2-4 uop unsn to issue. */
12033 while (cur_uops != PPRO_UOPS_FEW)
12035 if (insnp == ready)
12037 cur_uops = ix86_safe_ppro_uops (*--insnp);
12040 /* If so, move it to the head of the line. */
12041 if (cur_uops == PPRO_UOPS_FEW)
12042 ix86_reorder_insn (insnp, e_ready);
12044 /* Issue the head of the queue. */
12045 issued_this_cycle = 1;
12046 decode[0] = *e_ready--;
12049 /* Look for simple insns to fill in the other two slots. */
12050 for (i = 1; i < 3; ++i)
12051 if (decode[i] == NULL)
12053 if (ready > e_ready)
12057 cur_uops = ix86_safe_ppro_uops (*insnp);
12058 while (cur_uops != PPRO_UOPS_ONE)
12060 if (insnp == ready)
12062 cur_uops = ix86_safe_ppro_uops (*--insnp);
12065 /* Found one. Move it to the head of the queue and issue it. */
12066 if (cur_uops == PPRO_UOPS_ONE)
12068 ix86_reorder_insn (insnp, e_ready);
12069 decode[i] = *e_ready--;
12070 issued_this_cycle++;
12074 /* ??? Didn't find one. Ideally, here we would do a lazy split
12075 of 2-uop insns, issue one and queue the other. */
12079 if (issued_this_cycle == 0)
12080 issued_this_cycle = 1;
12081 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12084 /* We are about to being issuing insns for this clock cycle.
12085 Override the default sort algorithm to better slot instructions. */
12087 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12088 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12089 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12091 int n_ready = *n_readyp;
12092 rtx *e_ready = ready + n_ready - 1;
12094 /* Make sure to go ahead and initialize key items in
12095 ix86_sched_data if we are not going to bother trying to
12096 reorder the ready queue. */
12099 ix86_sched_data.ppro.issued_this_cycle = 1;
12108 case PROCESSOR_PENTIUMPRO:
12109 ix86_sched_reorder_ppro (ready, e_ready);
12114 return ix86_issue_rate ();
12117 /* We are about to issue INSN. Return the number of insns left on the
12118 ready queue that can be issued this cycle. */
12121 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12122 int can_issue_more)
12128 return can_issue_more - 1;
12130 case PROCESSOR_PENTIUMPRO:
12132 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12134 if (uops == PPRO_UOPS_MANY)
12137 ix86_dump_ppro_packet (dump);
12138 ix86_sched_data.ppro.decode[0] = insn;
12139 ix86_sched_data.ppro.decode[1] = NULL;
12140 ix86_sched_data.ppro.decode[2] = NULL;
12142 ix86_dump_ppro_packet (dump);
12143 ix86_sched_data.ppro.decode[0] = NULL;
12145 else if (uops == PPRO_UOPS_FEW)
12148 ix86_dump_ppro_packet (dump);
12149 ix86_sched_data.ppro.decode[0] = insn;
12150 ix86_sched_data.ppro.decode[1] = NULL;
12151 ix86_sched_data.ppro.decode[2] = NULL;
12155 for (i = 0; i < 3; ++i)
12156 if (ix86_sched_data.ppro.decode[i] == NULL)
12158 ix86_sched_data.ppro.decode[i] = insn;
12166 ix86_dump_ppro_packet (dump);
12167 ix86_sched_data.ppro.decode[0] = NULL;
12168 ix86_sched_data.ppro.decode[1] = NULL;
12169 ix86_sched_data.ppro.decode[2] = NULL;
12173 return --ix86_sched_data.ppro.issued_this_cycle;
12178 ia32_use_dfa_pipeline_interface (void)
12180 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12185 /* How many alternative schedules to try. This should be as wide as the
12186 scheduling freedom in the DFA, but no wider. Making this value too
12187 large results extra work for the scheduler. */
12190 ia32_multipass_dfa_lookahead (void)
12192 if (ix86_tune == PROCESSOR_PENTIUM)
12199 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12200 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12204 ix86_set_move_mem_attrs (rtx insns, rtx dstref, rtx srcref, rtx dstreg,
12209 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12211 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12215 /* Subroutine of above to actually do the updating by recursively walking
12219 ix86_set_move_mem_attrs_1 (rtx x, rtx dstref, rtx srcref, rtx dstreg,
12222 enum rtx_code code = GET_CODE (x);
12223 const char *format_ptr = GET_RTX_FORMAT (code);
12226 if (code == MEM && XEXP (x, 0) == dstreg)
12227 MEM_COPY_ATTRIBUTES (x, dstref);
12228 else if (code == MEM && XEXP (x, 0) == srcreg)
12229 MEM_COPY_ATTRIBUTES (x, srcref);
12231 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12233 if (*format_ptr == 'e')
12234 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12236 else if (*format_ptr == 'E')
12237 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12238 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12243 /* Compute the alignment given to a constant that is being placed in memory.
12244 EXP is the constant and ALIGN is the alignment that the object would
12246 The value of this function is used instead of that alignment to align
12250 ix86_constant_alignment (tree exp, int align)
12252 if (TREE_CODE (exp) == REAL_CST)
12254 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12256 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12259 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12266 /* Compute the alignment for a static variable.
12267 TYPE is the data type, and ALIGN is the alignment that
12268 the object would ordinarily have. The value of this function is used
12269 instead of that alignment to align the object. */
12272 ix86_data_alignment (tree type, int align)
12274 if (AGGREGATE_TYPE_P (type)
12275 && TYPE_SIZE (type)
12276 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12277 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12278 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12281 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12282 to 16byte boundary. */
12285 if (AGGREGATE_TYPE_P (type)
12286 && TYPE_SIZE (type)
12287 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12288 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12289 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12293 if (TREE_CODE (type) == ARRAY_TYPE)
12295 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12297 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12300 else if (TREE_CODE (type) == COMPLEX_TYPE)
12303 if (TYPE_MODE (type) == DCmode && align < 64)
12305 if (TYPE_MODE (type) == XCmode && align < 128)
12308 else if ((TREE_CODE (type) == RECORD_TYPE
12309 || TREE_CODE (type) == UNION_TYPE
12310 || TREE_CODE (type) == QUAL_UNION_TYPE)
12311 && TYPE_FIELDS (type))
12313 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12315 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12318 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12319 || TREE_CODE (type) == INTEGER_TYPE)
12321 if (TYPE_MODE (type) == DFmode && align < 64)
12323 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12330 /* Compute the alignment for a local variable.
12331 TYPE is the data type, and ALIGN is the alignment that
12332 the object would ordinarily have. The value of this macro is used
12333 instead of that alignment to align the object. */
12336 ix86_local_alignment (tree type, int align)
12338 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12339 to 16byte boundary. */
12342 if (AGGREGATE_TYPE_P (type)
12343 && TYPE_SIZE (type)
12344 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12345 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12346 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12349 if (TREE_CODE (type) == ARRAY_TYPE)
12351 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12353 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12356 else if (TREE_CODE (type) == COMPLEX_TYPE)
12358 if (TYPE_MODE (type) == DCmode && align < 64)
12360 if (TYPE_MODE (type) == XCmode && align < 128)
12363 else if ((TREE_CODE (type) == RECORD_TYPE
12364 || TREE_CODE (type) == UNION_TYPE
12365 || TREE_CODE (type) == QUAL_UNION_TYPE)
12366 && TYPE_FIELDS (type))
12368 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12370 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12373 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12374 || TREE_CODE (type) == INTEGER_TYPE)
12377 if (TYPE_MODE (type) == DFmode && align < 64)
12379 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12385 /* Emit RTL insns to initialize the variable parts of a trampoline.
12386 FNADDR is an RTX for the address of the function's pure code.
12387 CXT is an RTX for the static chain value for the function. */
12389 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12393 /* Compute offset from the end of the jmp to the target function. */
12394 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12395 plus_constant (tramp, 10),
12396 NULL_RTX, 1, OPTAB_DIRECT);
12397 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12398 gen_int_mode (0xb9, QImode));
12399 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12400 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12401 gen_int_mode (0xe9, QImode));
12402 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12407 /* Try to load address using shorter movl instead of movabs.
12408 We may want to support movq for kernel mode, but kernel does not use
12409 trampolines at the moment. */
12410 if (x86_64_zero_extended_value (fnaddr))
12412 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12413 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12414 gen_int_mode (0xbb41, HImode));
12415 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12416 gen_lowpart (SImode, fnaddr));
12421 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12422 gen_int_mode (0xbb49, HImode));
12423 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12427 /* Load static chain using movabs to r10. */
12428 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12429 gen_int_mode (0xba49, HImode));
12430 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12433 /* Jump to the r11 */
12434 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12435 gen_int_mode (0xff49, HImode));
12436 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12437 gen_int_mode (0xe3, QImode));
12439 if (offset > TRAMPOLINE_SIZE)
12443 #ifdef TRANSFER_FROM_TRAMPOLINE
12444 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12445 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12449 #define def_builtin(MASK, NAME, TYPE, CODE) \
12451 if ((MASK) & target_flags \
12452 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12453 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12454 NULL, NULL_TREE); \
12457 struct builtin_description
12459 const unsigned int mask;
12460 const enum insn_code icode;
12461 const char *const name;
12462 const enum ix86_builtins code;
12463 const enum rtx_code comparison;
12464 const unsigned int flag;
12467 static const struct builtin_description bdesc_comi[] =
12469 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12470 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12471 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12472 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12473 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12474 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12475 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12476 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12477 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12478 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12479 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12480 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12481 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12482 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12483 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12484 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12485 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12486 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12487 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12488 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12489 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12490 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12491 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12492 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12495 static const struct builtin_description bdesc_2arg[] =
12498 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12499 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12500 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12501 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12502 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12503 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12504 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12505 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12507 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12508 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12509 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12510 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12511 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12512 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12513 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12514 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12515 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12516 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12517 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12518 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12519 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12520 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12521 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12522 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12523 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12524 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12525 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12526 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12528 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12529 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12530 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12531 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12533 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12534 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12535 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12536 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12538 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12539 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12540 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12541 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12542 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12545 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12546 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12547 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12548 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12549 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12550 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12551 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12552 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12554 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12555 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12556 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12557 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12558 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12559 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12560 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12561 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12563 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12564 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12565 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12567 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12568 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12569 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12570 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12572 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12573 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12575 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12576 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12577 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12578 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12579 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12580 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12582 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12583 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12584 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12585 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12587 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12588 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12589 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12590 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12591 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12592 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12595 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12596 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12597 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12599 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12600 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12601 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12603 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12604 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12605 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12606 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12607 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12608 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12610 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12611 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12612 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12613 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12614 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12615 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12617 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12618 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12619 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12620 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12622 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12623 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12626 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12627 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12628 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12629 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12630 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12631 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12632 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12633 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12635 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12636 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12637 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12638 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12639 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12640 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12641 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12642 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12643 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12644 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12645 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12646 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12647 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12648 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12649 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12650 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12651 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12652 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12653 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12654 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12656 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12657 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12658 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12659 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12661 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12662 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12663 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12664 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12666 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12667 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12668 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12671 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12672 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12673 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12674 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12675 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12676 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12677 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12678 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12680 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12681 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12682 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12683 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12684 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12685 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12686 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12687 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12689 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12690 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12691 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12692 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12694 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12695 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12696 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12697 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12699 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12700 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12702 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12703 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12704 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12705 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12706 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12707 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12709 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12710 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12711 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12712 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12714 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12715 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12716 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12717 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12718 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12719 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12720 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12721 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12723 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12724 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12725 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12727 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12728 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12730 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12731 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12732 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12733 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12734 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12735 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12737 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12738 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12739 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12740 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12741 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12742 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12744 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12745 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12746 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12747 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12749 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12751 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12752 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12753 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12754 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12757 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12758 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12759 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12760 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12761 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12762 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12765 static const struct builtin_description bdesc_1arg[] =
12767 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12768 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12770 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12771 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12772 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12774 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12775 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12776 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12777 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12778 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12779 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12781 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12782 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12783 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12784 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12786 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12788 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12789 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12791 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12792 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12793 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12794 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12795 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12797 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12799 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12800 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12801 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12802 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12804 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12805 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12806 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12808 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12811 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12812 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12813 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12817 ix86_init_builtins (void)
12820 ix86_init_mmx_sse_builtins ();
12823 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12824 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12827 ix86_init_mmx_sse_builtins (void)
12829 const struct builtin_description * d;
12832 tree pchar_type_node = build_pointer_type (char_type_node);
12833 tree pcchar_type_node = build_pointer_type (
12834 build_type_variant (char_type_node, 1, 0));
12835 tree pfloat_type_node = build_pointer_type (float_type_node);
12836 tree pcfloat_type_node = build_pointer_type (
12837 build_type_variant (float_type_node, 1, 0));
12838 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12839 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12840 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12843 tree int_ftype_v4sf_v4sf
12844 = build_function_type_list (integer_type_node,
12845 V4SF_type_node, V4SF_type_node, NULL_TREE);
12846 tree v4si_ftype_v4sf_v4sf
12847 = build_function_type_list (V4SI_type_node,
12848 V4SF_type_node, V4SF_type_node, NULL_TREE);
12849 /* MMX/SSE/integer conversions. */
12850 tree int_ftype_v4sf
12851 = build_function_type_list (integer_type_node,
12852 V4SF_type_node, NULL_TREE);
12853 tree int64_ftype_v4sf
12854 = build_function_type_list (long_long_integer_type_node,
12855 V4SF_type_node, NULL_TREE);
12856 tree int_ftype_v8qi
12857 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12858 tree v4sf_ftype_v4sf_int
12859 = build_function_type_list (V4SF_type_node,
12860 V4SF_type_node, integer_type_node, NULL_TREE);
12861 tree v4sf_ftype_v4sf_int64
12862 = build_function_type_list (V4SF_type_node,
12863 V4SF_type_node, long_long_integer_type_node,
12865 tree v4sf_ftype_v4sf_v2si
12866 = build_function_type_list (V4SF_type_node,
12867 V4SF_type_node, V2SI_type_node, NULL_TREE);
12868 tree int_ftype_v4hi_int
12869 = build_function_type_list (integer_type_node,
12870 V4HI_type_node, integer_type_node, NULL_TREE);
12871 tree v4hi_ftype_v4hi_int_int
12872 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12873 integer_type_node, integer_type_node,
12875 /* Miscellaneous. */
12876 tree v8qi_ftype_v4hi_v4hi
12877 = build_function_type_list (V8QI_type_node,
12878 V4HI_type_node, V4HI_type_node, NULL_TREE);
12879 tree v4hi_ftype_v2si_v2si
12880 = build_function_type_list (V4HI_type_node,
12881 V2SI_type_node, V2SI_type_node, NULL_TREE);
12882 tree v4sf_ftype_v4sf_v4sf_int
12883 = build_function_type_list (V4SF_type_node,
12884 V4SF_type_node, V4SF_type_node,
12885 integer_type_node, NULL_TREE);
12886 tree v2si_ftype_v4hi_v4hi
12887 = build_function_type_list (V2SI_type_node,
12888 V4HI_type_node, V4HI_type_node, NULL_TREE);
12889 tree v4hi_ftype_v4hi_int
12890 = build_function_type_list (V4HI_type_node,
12891 V4HI_type_node, integer_type_node, NULL_TREE);
12892 tree v4hi_ftype_v4hi_di
12893 = build_function_type_list (V4HI_type_node,
12894 V4HI_type_node, long_long_unsigned_type_node,
12896 tree v2si_ftype_v2si_di
12897 = build_function_type_list (V2SI_type_node,
12898 V2SI_type_node, long_long_unsigned_type_node,
12900 tree void_ftype_void
12901 = build_function_type (void_type_node, void_list_node);
12902 tree void_ftype_unsigned
12903 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12904 tree void_ftype_unsigned_unsigned
12905 = build_function_type_list (void_type_node, unsigned_type_node,
12906 unsigned_type_node, NULL_TREE);
12907 tree void_ftype_pcvoid_unsigned_unsigned
12908 = build_function_type_list (void_type_node, const_ptr_type_node,
12909 unsigned_type_node, unsigned_type_node,
12911 tree unsigned_ftype_void
12912 = build_function_type (unsigned_type_node, void_list_node);
12914 = build_function_type (long_long_unsigned_type_node, void_list_node);
12915 tree v4sf_ftype_void
12916 = build_function_type (V4SF_type_node, void_list_node);
12917 tree v2si_ftype_v4sf
12918 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12919 /* Loads/stores. */
12920 tree void_ftype_v8qi_v8qi_pchar
12921 = build_function_type_list (void_type_node,
12922 V8QI_type_node, V8QI_type_node,
12923 pchar_type_node, NULL_TREE);
12924 tree v4sf_ftype_pcfloat
12925 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12926 /* @@@ the type is bogus */
12927 tree v4sf_ftype_v4sf_pv2si
12928 = build_function_type_list (V4SF_type_node,
12929 V4SF_type_node, pv2si_type_node, NULL_TREE);
12930 tree void_ftype_pv2si_v4sf
12931 = build_function_type_list (void_type_node,
12932 pv2si_type_node, V4SF_type_node, NULL_TREE);
12933 tree void_ftype_pfloat_v4sf
12934 = build_function_type_list (void_type_node,
12935 pfloat_type_node, V4SF_type_node, NULL_TREE);
12936 tree void_ftype_pdi_di
12937 = build_function_type_list (void_type_node,
12938 pdi_type_node, long_long_unsigned_type_node,
12940 tree void_ftype_pv2di_v2di
12941 = build_function_type_list (void_type_node,
12942 pv2di_type_node, V2DI_type_node, NULL_TREE);
12943 /* Normal vector unops. */
12944 tree v4sf_ftype_v4sf
12945 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12947 /* Normal vector binops. */
12948 tree v4sf_ftype_v4sf_v4sf
12949 = build_function_type_list (V4SF_type_node,
12950 V4SF_type_node, V4SF_type_node, NULL_TREE);
12951 tree v8qi_ftype_v8qi_v8qi
12952 = build_function_type_list (V8QI_type_node,
12953 V8QI_type_node, V8QI_type_node, NULL_TREE);
12954 tree v4hi_ftype_v4hi_v4hi
12955 = build_function_type_list (V4HI_type_node,
12956 V4HI_type_node, V4HI_type_node, NULL_TREE);
12957 tree v2si_ftype_v2si_v2si
12958 = build_function_type_list (V2SI_type_node,
12959 V2SI_type_node, V2SI_type_node, NULL_TREE);
12960 tree di_ftype_di_di
12961 = build_function_type_list (long_long_unsigned_type_node,
12962 long_long_unsigned_type_node,
12963 long_long_unsigned_type_node, NULL_TREE);
12965 tree v2si_ftype_v2sf
12966 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12967 tree v2sf_ftype_v2si
12968 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12969 tree v2si_ftype_v2si
12970 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12971 tree v2sf_ftype_v2sf
12972 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12973 tree v2sf_ftype_v2sf_v2sf
12974 = build_function_type_list (V2SF_type_node,
12975 V2SF_type_node, V2SF_type_node, NULL_TREE);
12976 tree v2si_ftype_v2sf_v2sf
12977 = build_function_type_list (V2SI_type_node,
12978 V2SF_type_node, V2SF_type_node, NULL_TREE);
12979 tree pint_type_node = build_pointer_type (integer_type_node);
12980 tree pcint_type_node = build_pointer_type (
12981 build_type_variant (integer_type_node, 1, 0));
12982 tree pdouble_type_node = build_pointer_type (double_type_node);
12983 tree pcdouble_type_node = build_pointer_type (
12984 build_type_variant (double_type_node, 1, 0));
12985 tree int_ftype_v2df_v2df
12986 = build_function_type_list (integer_type_node,
12987 V2DF_type_node, V2DF_type_node, NULL_TREE);
12990 = build_function_type (intTI_type_node, void_list_node);
12991 tree v2di_ftype_void
12992 = build_function_type (V2DI_type_node, void_list_node);
12993 tree ti_ftype_ti_ti
12994 = build_function_type_list (intTI_type_node,
12995 intTI_type_node, intTI_type_node, NULL_TREE);
12996 tree void_ftype_pcvoid
12997 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12999 = build_function_type_list (V2DI_type_node,
13000 long_long_unsigned_type_node, NULL_TREE);
13002 = build_function_type_list (long_long_unsigned_type_node,
13003 V2DI_type_node, NULL_TREE);
13004 tree v4sf_ftype_v4si
13005 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13006 tree v4si_ftype_v4sf
13007 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13008 tree v2df_ftype_v4si
13009 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13010 tree v4si_ftype_v2df
13011 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13012 tree v2si_ftype_v2df
13013 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13014 tree v4sf_ftype_v2df
13015 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13016 tree v2df_ftype_v2si
13017 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13018 tree v2df_ftype_v4sf
13019 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13020 tree int_ftype_v2df
13021 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13022 tree int64_ftype_v2df
13023 = build_function_type_list (long_long_integer_type_node,
13024 V2DF_type_node, NULL_TREE);
13025 tree v2df_ftype_v2df_int
13026 = build_function_type_list (V2DF_type_node,
13027 V2DF_type_node, integer_type_node, NULL_TREE);
13028 tree v2df_ftype_v2df_int64
13029 = build_function_type_list (V2DF_type_node,
13030 V2DF_type_node, long_long_integer_type_node,
13032 tree v4sf_ftype_v4sf_v2df
13033 = build_function_type_list (V4SF_type_node,
13034 V4SF_type_node, V2DF_type_node, NULL_TREE);
13035 tree v2df_ftype_v2df_v4sf
13036 = build_function_type_list (V2DF_type_node,
13037 V2DF_type_node, V4SF_type_node, NULL_TREE);
13038 tree v2df_ftype_v2df_v2df_int
13039 = build_function_type_list (V2DF_type_node,
13040 V2DF_type_node, V2DF_type_node,
13043 tree v2df_ftype_v2df_pv2si
13044 = build_function_type_list (V2DF_type_node,
13045 V2DF_type_node, pv2si_type_node, NULL_TREE);
13046 tree void_ftype_pv2si_v2df
13047 = build_function_type_list (void_type_node,
13048 pv2si_type_node, V2DF_type_node, NULL_TREE);
13049 tree void_ftype_pdouble_v2df
13050 = build_function_type_list (void_type_node,
13051 pdouble_type_node, V2DF_type_node, NULL_TREE);
13052 tree void_ftype_pint_int
13053 = build_function_type_list (void_type_node,
13054 pint_type_node, integer_type_node, NULL_TREE);
13055 tree void_ftype_v16qi_v16qi_pchar
13056 = build_function_type_list (void_type_node,
13057 V16QI_type_node, V16QI_type_node,
13058 pchar_type_node, NULL_TREE);
13059 tree v2df_ftype_pcdouble
13060 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13061 tree v2df_ftype_v2df_v2df
13062 = build_function_type_list (V2DF_type_node,
13063 V2DF_type_node, V2DF_type_node, NULL_TREE);
13064 tree v16qi_ftype_v16qi_v16qi
13065 = build_function_type_list (V16QI_type_node,
13066 V16QI_type_node, V16QI_type_node, NULL_TREE);
13067 tree v8hi_ftype_v8hi_v8hi
13068 = build_function_type_list (V8HI_type_node,
13069 V8HI_type_node, V8HI_type_node, NULL_TREE);
13070 tree v4si_ftype_v4si_v4si
13071 = build_function_type_list (V4SI_type_node,
13072 V4SI_type_node, V4SI_type_node, NULL_TREE);
13073 tree v2di_ftype_v2di_v2di
13074 = build_function_type_list (V2DI_type_node,
13075 V2DI_type_node, V2DI_type_node, NULL_TREE);
13076 tree v2di_ftype_v2df_v2df
13077 = build_function_type_list (V2DI_type_node,
13078 V2DF_type_node, V2DF_type_node, NULL_TREE);
13079 tree v2df_ftype_v2df
13080 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13081 tree v2df_ftype_double
13082 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13083 tree v2df_ftype_double_double
13084 = build_function_type_list (V2DF_type_node,
13085 double_type_node, double_type_node, NULL_TREE);
13086 tree int_ftype_v8hi_int
13087 = build_function_type_list (integer_type_node,
13088 V8HI_type_node, integer_type_node, NULL_TREE);
13089 tree v8hi_ftype_v8hi_int_int
13090 = build_function_type_list (V8HI_type_node,
13091 V8HI_type_node, integer_type_node,
13092 integer_type_node, NULL_TREE);
13093 tree v2di_ftype_v2di_int
13094 = build_function_type_list (V2DI_type_node,
13095 V2DI_type_node, integer_type_node, NULL_TREE);
13096 tree v4si_ftype_v4si_int
13097 = build_function_type_list (V4SI_type_node,
13098 V4SI_type_node, integer_type_node, NULL_TREE);
13099 tree v8hi_ftype_v8hi_int
13100 = build_function_type_list (V8HI_type_node,
13101 V8HI_type_node, integer_type_node, NULL_TREE);
13102 tree v8hi_ftype_v8hi_v2di
13103 = build_function_type_list (V8HI_type_node,
13104 V8HI_type_node, V2DI_type_node, NULL_TREE);
13105 tree v4si_ftype_v4si_v2di
13106 = build_function_type_list (V4SI_type_node,
13107 V4SI_type_node, V2DI_type_node, NULL_TREE);
13108 tree v4si_ftype_v8hi_v8hi
13109 = build_function_type_list (V4SI_type_node,
13110 V8HI_type_node, V8HI_type_node, NULL_TREE);
13111 tree di_ftype_v8qi_v8qi
13112 = build_function_type_list (long_long_unsigned_type_node,
13113 V8QI_type_node, V8QI_type_node, NULL_TREE);
13114 tree v2di_ftype_v16qi_v16qi
13115 = build_function_type_list (V2DI_type_node,
13116 V16QI_type_node, V16QI_type_node, NULL_TREE);
13117 tree int_ftype_v16qi
13118 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13119 tree v16qi_ftype_pcchar
13120 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13121 tree void_ftype_pchar_v16qi
13122 = build_function_type_list (void_type_node,
13123 pchar_type_node, V16QI_type_node, NULL_TREE);
13124 tree v4si_ftype_pcint
13125 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13126 tree void_ftype_pcint_v4si
13127 = build_function_type_list (void_type_node,
13128 pcint_type_node, V4SI_type_node, NULL_TREE);
13129 tree v2di_ftype_v2di
13130 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13132 /* Add all builtins that are more or less simple operations on two
13134 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13136 /* Use one of the operands; the target can have a different mode for
13137 mask-generating compares. */
13138 enum machine_mode mode;
13143 mode = insn_data[d->icode].operand[1].mode;
13148 type = v16qi_ftype_v16qi_v16qi;
13151 type = v8hi_ftype_v8hi_v8hi;
13154 type = v4si_ftype_v4si_v4si;
13157 type = v2di_ftype_v2di_v2di;
13160 type = v2df_ftype_v2df_v2df;
13163 type = ti_ftype_ti_ti;
13166 type = v4sf_ftype_v4sf_v4sf;
13169 type = v8qi_ftype_v8qi_v8qi;
13172 type = v4hi_ftype_v4hi_v4hi;
13175 type = v2si_ftype_v2si_v2si;
13178 type = di_ftype_di_di;
13185 /* Override for comparisons. */
13186 if (d->icode == CODE_FOR_maskcmpv4sf3
13187 || d->icode == CODE_FOR_maskncmpv4sf3
13188 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13189 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13190 type = v4si_ftype_v4sf_v4sf;
13192 if (d->icode == CODE_FOR_maskcmpv2df3
13193 || d->icode == CODE_FOR_maskncmpv2df3
13194 || d->icode == CODE_FOR_vmmaskcmpv2df3
13195 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13196 type = v2di_ftype_v2df_v2df;
13198 def_builtin (d->mask, d->name, type, d->code);
13201 /* Add the remaining MMX insns with somewhat more complicated types. */
13202 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13203 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13204 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13205 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13206 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13208 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13209 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13210 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13212 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13213 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13215 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13216 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13218 /* comi/ucomi insns. */
13219 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13220 if (d->mask == MASK_SSE2)
13221 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13223 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13225 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13226 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13227 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13229 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13230 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13231 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13232 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13233 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13234 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13235 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13236 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13237 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13238 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13239 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13241 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13242 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13244 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13246 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13247 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13248 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13249 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13250 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13251 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13253 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13254 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13255 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13256 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13258 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13259 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13260 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13261 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13263 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13265 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13267 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13268 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13269 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13270 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13271 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13272 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13274 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13276 /* Original 3DNow! */
13277 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13278 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13279 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13280 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13281 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13282 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13283 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13284 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13285 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13286 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13287 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13288 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13289 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13290 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13291 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13292 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13293 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13294 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13295 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13296 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13298 /* 3DNow! extension as used in the Athlon CPU. */
13299 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13300 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13301 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13302 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13303 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13304 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13306 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13309 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13310 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13312 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13313 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13314 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13316 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13317 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13318 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13319 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13320 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13321 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13323 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13324 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13325 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13326 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13328 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13329 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13330 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13331 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13332 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13334 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13335 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13336 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13337 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13339 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13340 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13342 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13344 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13345 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13347 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13348 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13349 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13350 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13351 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13353 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13355 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13356 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13357 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13358 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13360 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13361 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13362 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13364 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13365 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13366 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13367 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13369 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13370 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13371 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13372 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13373 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13374 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13375 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13377 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13378 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13379 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13381 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13382 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13383 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13384 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13385 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13386 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13387 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13389 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13391 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13392 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13393 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13395 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13396 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13397 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13399 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13400 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13402 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13403 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13404 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13405 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13407 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13408 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13409 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13410 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13412 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13413 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13415 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13417 /* Prescott New Instructions. */
13418 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13419 void_ftype_pcvoid_unsigned_unsigned,
13420 IX86_BUILTIN_MONITOR);
13421 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13422 void_ftype_unsigned_unsigned,
13423 IX86_BUILTIN_MWAIT);
13424 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13426 IX86_BUILTIN_MOVSHDUP);
13427 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13429 IX86_BUILTIN_MOVSLDUP);
13430 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13431 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13432 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13433 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13434 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13435 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13438 /* Errors in the source file can cause expand_expr to return const0_rtx
13439 where we expect a vector. To avoid crashing, use one of the vector
13440 clear instructions. */
13442 safe_vector_operand (rtx x, enum machine_mode mode)
13444 if (x != const0_rtx)
13446 x = gen_reg_rtx (mode);
13448 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13449 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13450 : gen_rtx_SUBREG (DImode, x, 0)));
13452 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13453 : gen_rtx_SUBREG (V4SFmode, x, 0),
13454 CONST0_RTX (V4SFmode)));
13458 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13461 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13464 tree arg0 = TREE_VALUE (arglist);
13465 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13466 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13467 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13468 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13469 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13470 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13472 if (VECTOR_MODE_P (mode0))
13473 op0 = safe_vector_operand (op0, mode0);
13474 if (VECTOR_MODE_P (mode1))
13475 op1 = safe_vector_operand (op1, mode1);
13478 || GET_MODE (target) != tmode
13479 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13480 target = gen_reg_rtx (tmode);
13482 if (GET_MODE (op1) == SImode && mode1 == TImode)
13484 rtx x = gen_reg_rtx (V4SImode);
13485 emit_insn (gen_sse2_loadd (x, op1));
13486 op1 = gen_lowpart (TImode, x);
13489 /* In case the insn wants input operands in modes different from
13490 the result, abort. */
13491 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13494 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13495 op0 = copy_to_mode_reg (mode0, op0);
13496 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13497 op1 = copy_to_mode_reg (mode1, op1);
13499 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13500 yet one of the two must not be a memory. This is normally enforced
13501 by expanders, but we didn't bother to create one here. */
13502 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13503 op0 = copy_to_mode_reg (mode0, op0);
13505 pat = GEN_FCN (icode) (target, op0, op1);
13512 /* Subroutine of ix86_expand_builtin to take care of stores. */
13515 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13518 tree arg0 = TREE_VALUE (arglist);
13519 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13520 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13521 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13522 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13523 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13525 if (VECTOR_MODE_P (mode1))
13526 op1 = safe_vector_operand (op1, mode1);
13528 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13529 op1 = copy_to_mode_reg (mode1, op1);
13531 pat = GEN_FCN (icode) (op0, op1);
13537 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13540 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13541 rtx target, int do_load)
13544 tree arg0 = TREE_VALUE (arglist);
13545 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13546 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13547 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13550 || GET_MODE (target) != tmode
13551 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13552 target = gen_reg_rtx (tmode);
13554 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13557 if (VECTOR_MODE_P (mode0))
13558 op0 = safe_vector_operand (op0, mode0);
13560 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13561 op0 = copy_to_mode_reg (mode0, op0);
13564 pat = GEN_FCN (icode) (target, op0);
13571 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13572 sqrtss, rsqrtss, rcpss. */
13575 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13578 tree arg0 = TREE_VALUE (arglist);
13579 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13580 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13581 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13584 || GET_MODE (target) != tmode
13585 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13586 target = gen_reg_rtx (tmode);
13588 if (VECTOR_MODE_P (mode0))
13589 op0 = safe_vector_operand (op0, mode0);
13591 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13592 op0 = copy_to_mode_reg (mode0, op0);
13595 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13596 op1 = copy_to_mode_reg (mode0, op1);
13598 pat = GEN_FCN (icode) (target, op0, op1);
13605 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13608 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13612 tree arg0 = TREE_VALUE (arglist);
13613 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13614 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13615 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13617 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13618 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13619 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13620 enum rtx_code comparison = d->comparison;
13622 if (VECTOR_MODE_P (mode0))
13623 op0 = safe_vector_operand (op0, mode0);
13624 if (VECTOR_MODE_P (mode1))
13625 op1 = safe_vector_operand (op1, mode1);
13627 /* Swap operands if we have a comparison that isn't available in
13631 rtx tmp = gen_reg_rtx (mode1);
13632 emit_move_insn (tmp, op1);
13638 || GET_MODE (target) != tmode
13639 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13640 target = gen_reg_rtx (tmode);
13642 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13643 op0 = copy_to_mode_reg (mode0, op0);
13644 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13645 op1 = copy_to_mode_reg (mode1, op1);
13647 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13648 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13655 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13658 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13662 tree arg0 = TREE_VALUE (arglist);
13663 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13664 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13665 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13667 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13668 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13669 enum rtx_code comparison = d->comparison;
13671 if (VECTOR_MODE_P (mode0))
13672 op0 = safe_vector_operand (op0, mode0);
13673 if (VECTOR_MODE_P (mode1))
13674 op1 = safe_vector_operand (op1, mode1);
13676 /* Swap operands if we have a comparison that isn't available in
13685 target = gen_reg_rtx (SImode);
13686 emit_move_insn (target, const0_rtx);
13687 target = gen_rtx_SUBREG (QImode, target, 0);
13689 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13690 op0 = copy_to_mode_reg (mode0, op0);
13691 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13692 op1 = copy_to_mode_reg (mode1, op1);
13694 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13695 pat = GEN_FCN (d->icode) (op0, op1);
13699 emit_insn (gen_rtx_SET (VOIDmode,
13700 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13701 gen_rtx_fmt_ee (comparison, QImode,
13705 return SUBREG_REG (target);
13708 /* Expand an expression EXP that calls a built-in function,
13709 with result going to TARGET if that's convenient
13710 (and in mode MODE if that's convenient).
13711 SUBTARGET may be used as the target for computing one of EXP's operands.
13712 IGNORE is nonzero if the value is to be ignored. */
13715 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13716 enum machine_mode mode ATTRIBUTE_UNUSED,
13717 int ignore ATTRIBUTE_UNUSED)
13719 const struct builtin_description *d;
13721 enum insn_code icode;
13722 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13723 tree arglist = TREE_OPERAND (exp, 1);
13724 tree arg0, arg1, arg2;
13725 rtx op0, op1, op2, pat;
13726 enum machine_mode tmode, mode0, mode1, mode2;
13727 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13731 case IX86_BUILTIN_EMMS:
13732 emit_insn (gen_emms ());
13735 case IX86_BUILTIN_SFENCE:
13736 emit_insn (gen_sfence ());
13739 case IX86_BUILTIN_PEXTRW:
13740 case IX86_BUILTIN_PEXTRW128:
13741 icode = (fcode == IX86_BUILTIN_PEXTRW
13742 ? CODE_FOR_mmx_pextrw
13743 : CODE_FOR_sse2_pextrw);
13744 arg0 = TREE_VALUE (arglist);
13745 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13746 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13747 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13748 tmode = insn_data[icode].operand[0].mode;
13749 mode0 = insn_data[icode].operand[1].mode;
13750 mode1 = insn_data[icode].operand[2].mode;
13752 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13753 op0 = copy_to_mode_reg (mode0, op0);
13754 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13756 /* @@@ better error message */
13757 error ("selector must be an immediate");
13758 return gen_reg_rtx (tmode);
13761 || GET_MODE (target) != tmode
13762 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13763 target = gen_reg_rtx (tmode);
13764 pat = GEN_FCN (icode) (target, op0, op1);
13770 case IX86_BUILTIN_PINSRW:
13771 case IX86_BUILTIN_PINSRW128:
13772 icode = (fcode == IX86_BUILTIN_PINSRW
13773 ? CODE_FOR_mmx_pinsrw
13774 : CODE_FOR_sse2_pinsrw);
13775 arg0 = TREE_VALUE (arglist);
13776 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13777 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13778 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13779 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13780 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13781 tmode = insn_data[icode].operand[0].mode;
13782 mode0 = insn_data[icode].operand[1].mode;
13783 mode1 = insn_data[icode].operand[2].mode;
13784 mode2 = insn_data[icode].operand[3].mode;
13786 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13787 op0 = copy_to_mode_reg (mode0, op0);
13788 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13789 op1 = copy_to_mode_reg (mode1, op1);
13790 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13792 /* @@@ better error message */
13793 error ("selector must be an immediate");
13797 || GET_MODE (target) != tmode
13798 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13799 target = gen_reg_rtx (tmode);
13800 pat = GEN_FCN (icode) (target, op0, op1, op2);
13806 case IX86_BUILTIN_MASKMOVQ:
13807 case IX86_BUILTIN_MASKMOVDQU:
13808 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13809 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13810 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13811 : CODE_FOR_sse2_maskmovdqu));
13812 /* Note the arg order is different from the operand order. */
13813 arg1 = TREE_VALUE (arglist);
13814 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13815 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13816 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13817 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13818 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13819 mode0 = insn_data[icode].operand[0].mode;
13820 mode1 = insn_data[icode].operand[1].mode;
13821 mode2 = insn_data[icode].operand[2].mode;
13823 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13824 op0 = copy_to_mode_reg (mode0, op0);
13825 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13826 op1 = copy_to_mode_reg (mode1, op1);
13827 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13828 op2 = copy_to_mode_reg (mode2, op2);
13829 pat = GEN_FCN (icode) (op0, op1, op2);
13835 case IX86_BUILTIN_SQRTSS:
13836 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13837 case IX86_BUILTIN_RSQRTSS:
13838 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13839 case IX86_BUILTIN_RCPSS:
13840 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13842 case IX86_BUILTIN_LOADAPS:
13843 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13845 case IX86_BUILTIN_LOADUPS:
13846 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13848 case IX86_BUILTIN_STOREAPS:
13849 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13851 case IX86_BUILTIN_STOREUPS:
13852 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13854 case IX86_BUILTIN_LOADSS:
13855 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13857 case IX86_BUILTIN_STORESS:
13858 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13860 case IX86_BUILTIN_LOADHPS:
13861 case IX86_BUILTIN_LOADLPS:
13862 case IX86_BUILTIN_LOADHPD:
13863 case IX86_BUILTIN_LOADLPD:
13864 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13865 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13866 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13867 : CODE_FOR_sse2_movlpd);
13868 arg0 = TREE_VALUE (arglist);
13869 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13870 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13871 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13872 tmode = insn_data[icode].operand[0].mode;
13873 mode0 = insn_data[icode].operand[1].mode;
13874 mode1 = insn_data[icode].operand[2].mode;
13876 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13877 op0 = copy_to_mode_reg (mode0, op0);
13878 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13880 || GET_MODE (target) != tmode
13881 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13882 target = gen_reg_rtx (tmode);
13883 pat = GEN_FCN (icode) (target, op0, op1);
13889 case IX86_BUILTIN_STOREHPS:
13890 case IX86_BUILTIN_STORELPS:
13891 case IX86_BUILTIN_STOREHPD:
13892 case IX86_BUILTIN_STORELPD:
13893 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13894 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13895 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13896 : CODE_FOR_sse2_movlpd);
13897 arg0 = TREE_VALUE (arglist);
13898 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13899 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13900 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13901 mode0 = insn_data[icode].operand[1].mode;
13902 mode1 = insn_data[icode].operand[2].mode;
13904 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13905 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13906 op1 = copy_to_mode_reg (mode1, op1);
13908 pat = GEN_FCN (icode) (op0, op0, op1);
13914 case IX86_BUILTIN_MOVNTPS:
13915 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13916 case IX86_BUILTIN_MOVNTQ:
13917 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13919 case IX86_BUILTIN_LDMXCSR:
13920 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13921 target = assign_386_stack_local (SImode, 0);
13922 emit_move_insn (target, op0);
13923 emit_insn (gen_ldmxcsr (target));
13926 case IX86_BUILTIN_STMXCSR:
13927 target = assign_386_stack_local (SImode, 0);
13928 emit_insn (gen_stmxcsr (target));
13929 return copy_to_mode_reg (SImode, target);
13931 case IX86_BUILTIN_SHUFPS:
13932 case IX86_BUILTIN_SHUFPD:
13933 icode = (fcode == IX86_BUILTIN_SHUFPS
13934 ? CODE_FOR_sse_shufps
13935 : CODE_FOR_sse2_shufpd);
13936 arg0 = TREE_VALUE (arglist);
13937 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13938 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13939 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13940 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13941 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13942 tmode = insn_data[icode].operand[0].mode;
13943 mode0 = insn_data[icode].operand[1].mode;
13944 mode1 = insn_data[icode].operand[2].mode;
13945 mode2 = insn_data[icode].operand[3].mode;
13947 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13948 op0 = copy_to_mode_reg (mode0, op0);
13949 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13950 op1 = copy_to_mode_reg (mode1, op1);
13951 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13953 /* @@@ better error message */
13954 error ("mask must be an immediate");
13955 return gen_reg_rtx (tmode);
13958 || GET_MODE (target) != tmode
13959 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13960 target = gen_reg_rtx (tmode);
13961 pat = GEN_FCN (icode) (target, op0, op1, op2);
13967 case IX86_BUILTIN_PSHUFW:
13968 case IX86_BUILTIN_PSHUFD:
13969 case IX86_BUILTIN_PSHUFHW:
13970 case IX86_BUILTIN_PSHUFLW:
13971 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13972 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13973 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13974 : CODE_FOR_mmx_pshufw);
13975 arg0 = TREE_VALUE (arglist);
13976 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13977 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13978 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13979 tmode = insn_data[icode].operand[0].mode;
13980 mode1 = insn_data[icode].operand[1].mode;
13981 mode2 = insn_data[icode].operand[2].mode;
13983 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13984 op0 = copy_to_mode_reg (mode1, op0);
13985 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13987 /* @@@ better error message */
13988 error ("mask must be an immediate");
13992 || GET_MODE (target) != tmode
13993 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13994 target = gen_reg_rtx (tmode);
13995 pat = GEN_FCN (icode) (target, op0, op1);
14001 case IX86_BUILTIN_PSLLDQI128:
14002 case IX86_BUILTIN_PSRLDQI128:
14003 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14004 : CODE_FOR_sse2_lshrti3);
14005 arg0 = TREE_VALUE (arglist);
14006 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14007 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14008 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14009 tmode = insn_data[icode].operand[0].mode;
14010 mode1 = insn_data[icode].operand[1].mode;
14011 mode2 = insn_data[icode].operand[2].mode;
14013 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14015 op0 = copy_to_reg (op0);
14016 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14018 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14020 error ("shift must be an immediate");
14023 target = gen_reg_rtx (V2DImode);
14024 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14030 case IX86_BUILTIN_FEMMS:
14031 emit_insn (gen_femms ());
14034 case IX86_BUILTIN_PAVGUSB:
14035 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14037 case IX86_BUILTIN_PF2ID:
14038 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14040 case IX86_BUILTIN_PFACC:
14041 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14043 case IX86_BUILTIN_PFADD:
14044 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14046 case IX86_BUILTIN_PFCMPEQ:
14047 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14049 case IX86_BUILTIN_PFCMPGE:
14050 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14052 case IX86_BUILTIN_PFCMPGT:
14053 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14055 case IX86_BUILTIN_PFMAX:
14056 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14058 case IX86_BUILTIN_PFMIN:
14059 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14061 case IX86_BUILTIN_PFMUL:
14062 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14064 case IX86_BUILTIN_PFRCP:
14065 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14067 case IX86_BUILTIN_PFRCPIT1:
14068 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14070 case IX86_BUILTIN_PFRCPIT2:
14071 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14073 case IX86_BUILTIN_PFRSQIT1:
14074 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14076 case IX86_BUILTIN_PFRSQRT:
14077 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14079 case IX86_BUILTIN_PFSUB:
14080 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14082 case IX86_BUILTIN_PFSUBR:
14083 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14085 case IX86_BUILTIN_PI2FD:
14086 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14088 case IX86_BUILTIN_PMULHRW:
14089 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14091 case IX86_BUILTIN_PF2IW:
14092 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14094 case IX86_BUILTIN_PFNACC:
14095 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14097 case IX86_BUILTIN_PFPNACC:
14098 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14100 case IX86_BUILTIN_PI2FW:
14101 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14103 case IX86_BUILTIN_PSWAPDSI:
14104 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14106 case IX86_BUILTIN_PSWAPDSF:
14107 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14109 case IX86_BUILTIN_SSE_ZERO:
14110 target = gen_reg_rtx (V4SFmode);
14111 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14114 case IX86_BUILTIN_MMX_ZERO:
14115 target = gen_reg_rtx (DImode);
14116 emit_insn (gen_mmx_clrdi (target));
14119 case IX86_BUILTIN_CLRTI:
14120 target = gen_reg_rtx (V2DImode);
14121 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14125 case IX86_BUILTIN_SQRTSD:
14126 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14127 case IX86_BUILTIN_LOADAPD:
14128 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14129 case IX86_BUILTIN_LOADUPD:
14130 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14132 case IX86_BUILTIN_STOREAPD:
14133 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14134 case IX86_BUILTIN_STOREUPD:
14135 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14137 case IX86_BUILTIN_LOADSD:
14138 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14140 case IX86_BUILTIN_STORESD:
14141 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14143 case IX86_BUILTIN_SETPD1:
14144 target = assign_386_stack_local (DFmode, 0);
14145 arg0 = TREE_VALUE (arglist);
14146 emit_move_insn (adjust_address (target, DFmode, 0),
14147 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14148 op0 = gen_reg_rtx (V2DFmode);
14149 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14150 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14153 case IX86_BUILTIN_SETPD:
14154 target = assign_386_stack_local (V2DFmode, 0);
14155 arg0 = TREE_VALUE (arglist);
14156 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14157 emit_move_insn (adjust_address (target, DFmode, 0),
14158 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14159 emit_move_insn (adjust_address (target, DFmode, 8),
14160 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14161 op0 = gen_reg_rtx (V2DFmode);
14162 emit_insn (gen_sse2_movapd (op0, target));
14165 case IX86_BUILTIN_LOADRPD:
14166 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14167 gen_reg_rtx (V2DFmode), 1);
14168 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14171 case IX86_BUILTIN_LOADPD1:
14172 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14173 gen_reg_rtx (V2DFmode), 1);
14174 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14177 case IX86_BUILTIN_STOREPD1:
14178 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14179 case IX86_BUILTIN_STORERPD:
14180 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14182 case IX86_BUILTIN_CLRPD:
14183 target = gen_reg_rtx (V2DFmode);
14184 emit_insn (gen_sse_clrv2df (target));
14187 case IX86_BUILTIN_MFENCE:
14188 emit_insn (gen_sse2_mfence ());
14190 case IX86_BUILTIN_LFENCE:
14191 emit_insn (gen_sse2_lfence ());
14194 case IX86_BUILTIN_CLFLUSH:
14195 arg0 = TREE_VALUE (arglist);
14196 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14197 icode = CODE_FOR_sse2_clflush;
14198 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14199 op0 = copy_to_mode_reg (Pmode, op0);
14201 emit_insn (gen_sse2_clflush (op0));
14204 case IX86_BUILTIN_MOVNTPD:
14205 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14206 case IX86_BUILTIN_MOVNTDQ:
14207 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14208 case IX86_BUILTIN_MOVNTI:
14209 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14211 case IX86_BUILTIN_LOADDQA:
14212 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14213 case IX86_BUILTIN_LOADDQU:
14214 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14215 case IX86_BUILTIN_LOADD:
14216 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14218 case IX86_BUILTIN_STOREDQA:
14219 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14220 case IX86_BUILTIN_STOREDQU:
14221 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14222 case IX86_BUILTIN_STORED:
14223 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14225 case IX86_BUILTIN_MONITOR:
14226 arg0 = TREE_VALUE (arglist);
14227 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14228 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14229 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14230 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14231 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14233 op0 = copy_to_mode_reg (SImode, op0);
14235 op1 = copy_to_mode_reg (SImode, op1);
14237 op2 = copy_to_mode_reg (SImode, op2);
14238 emit_insn (gen_monitor (op0, op1, op2));
14241 case IX86_BUILTIN_MWAIT:
14242 arg0 = TREE_VALUE (arglist);
14243 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14244 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14245 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14247 op0 = copy_to_mode_reg (SImode, op0);
14249 op1 = copy_to_mode_reg (SImode, op1);
14250 emit_insn (gen_mwait (op0, op1));
14253 case IX86_BUILTIN_LOADDDUP:
14254 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14256 case IX86_BUILTIN_LDDQU:
14257 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14264 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14265 if (d->code == fcode)
14267 /* Compares are treated specially. */
14268 if (d->icode == CODE_FOR_maskcmpv4sf3
14269 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14270 || d->icode == CODE_FOR_maskncmpv4sf3
14271 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14272 || d->icode == CODE_FOR_maskcmpv2df3
14273 || d->icode == CODE_FOR_vmmaskcmpv2df3
14274 || d->icode == CODE_FOR_maskncmpv2df3
14275 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14276 return ix86_expand_sse_compare (d, arglist, target);
14278 return ix86_expand_binop_builtin (d->icode, arglist, target);
14281 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14282 if (d->code == fcode)
14283 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14285 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14286 if (d->code == fcode)
14287 return ix86_expand_sse_comi (d, arglist, target);
14289 /* @@@ Should really do something sensible here. */
14293 /* Store OPERAND to the memory after reload is completed. This means
14294 that we can't easily use assign_stack_local. */
14296 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14299 if (!reload_completed)
14301 if (TARGET_RED_ZONE)
14303 result = gen_rtx_MEM (mode,
14304 gen_rtx_PLUS (Pmode,
14306 GEN_INT (-RED_ZONE_SIZE)));
14307 emit_move_insn (result, operand);
14309 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14315 operand = gen_lowpart (DImode, operand);
14319 gen_rtx_SET (VOIDmode,
14320 gen_rtx_MEM (DImode,
14321 gen_rtx_PRE_DEC (DImode,
14322 stack_pointer_rtx)),
14328 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14337 split_di (&operand, 1, operands, operands + 1);
14339 gen_rtx_SET (VOIDmode,
14340 gen_rtx_MEM (SImode,
14341 gen_rtx_PRE_DEC (Pmode,
14342 stack_pointer_rtx)),
14345 gen_rtx_SET (VOIDmode,
14346 gen_rtx_MEM (SImode,
14347 gen_rtx_PRE_DEC (Pmode,
14348 stack_pointer_rtx)),
14353 /* It is better to store HImodes as SImodes. */
14354 if (!TARGET_PARTIAL_REG_STALL)
14355 operand = gen_lowpart (SImode, operand);
14359 gen_rtx_SET (VOIDmode,
14360 gen_rtx_MEM (GET_MODE (operand),
14361 gen_rtx_PRE_DEC (SImode,
14362 stack_pointer_rtx)),
14368 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14373 /* Free operand from the memory. */
14375 ix86_free_from_memory (enum machine_mode mode)
14377 if (!TARGET_RED_ZONE)
14381 if (mode == DImode || TARGET_64BIT)
14383 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14387 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14388 to pop or add instruction if registers are available. */
14389 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14390 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14395 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14396 QImode must go into class Q_REGS.
14397 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14398 movdf to do mem-to-mem moves through integer regs. */
14400 ix86_preferred_reload_class (rtx x, enum reg_class class)
14402 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14404 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14406 /* SSE can't load any constant directly yet. */
14407 if (SSE_CLASS_P (class))
14409 /* Floats can load 0 and 1. */
14410 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14412 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14413 if (MAYBE_SSE_CLASS_P (class))
14414 return (reg_class_subset_p (class, GENERAL_REGS)
14415 ? GENERAL_REGS : FLOAT_REGS);
14419 /* General regs can load everything. */
14420 if (reg_class_subset_p (class, GENERAL_REGS))
14421 return GENERAL_REGS;
14422 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14423 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14426 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14428 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14433 /* If we are copying between general and FP registers, we need a memory
14434 location. The same is true for SSE and MMX registers.
14436 The macro can't work reliably when one of the CLASSES is class containing
14437 registers from multiple units (SSE, MMX, integer). We avoid this by never
14438 combining those units in single alternative in the machine description.
14439 Ensure that this constraint holds to avoid unexpected surprises.
14441 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14442 enforce these sanity checks. */
14444 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14445 enum machine_mode mode, int strict)
14447 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14448 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14449 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14450 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14451 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14452 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14459 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14460 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14461 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14462 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14463 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14465 /* Return the cost of moving data from a register in class CLASS1 to
14466 one in class CLASS2.
14468 It is not required that the cost always equal 2 when FROM is the same as TO;
14469 on some machines it is expensive to move between registers if they are not
14470 general registers. */
14472 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14473 enum reg_class class2)
14475 /* In case we require secondary memory, compute cost of the store followed
14476 by load. In order to avoid bad register allocation choices, we need
14477 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14479 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14483 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14484 MEMORY_MOVE_COST (mode, class1, 1));
14485 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14486 MEMORY_MOVE_COST (mode, class2, 1));
14488 /* In case of copying from general_purpose_register we may emit multiple
14489 stores followed by single load causing memory size mismatch stall.
14490 Count this as arbitrarily high cost of 20. */
14491 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14494 /* In the case of FP/MMX moves, the registers actually overlap, and we
14495 have to switch modes in order to treat them differently. */
14496 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14497 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14503 /* Moves between SSE/MMX and integer unit are expensive. */
14504 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14505 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14506 return ix86_cost->mmxsse_to_integer;
14507 if (MAYBE_FLOAT_CLASS_P (class1))
14508 return ix86_cost->fp_move;
14509 if (MAYBE_SSE_CLASS_P (class1))
14510 return ix86_cost->sse_move;
14511 if (MAYBE_MMX_CLASS_P (class1))
14512 return ix86_cost->mmx_move;
14516 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14518 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14520 /* Flags and only flags can only hold CCmode values. */
14521 if (CC_REGNO_P (regno))
14522 return GET_MODE_CLASS (mode) == MODE_CC;
14523 if (GET_MODE_CLASS (mode) == MODE_CC
14524 || GET_MODE_CLASS (mode) == MODE_RANDOM
14525 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14527 if (FP_REGNO_P (regno))
14528 return VALID_FP_MODE_P (mode);
14529 if (SSE_REGNO_P (regno))
14530 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14531 if (MMX_REGNO_P (regno))
14533 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14534 /* We handle both integer and floats in the general purpose registers.
14535 In future we should be able to handle vector modes as well. */
14536 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14538 /* Take care for QImode values - they can be in non-QI regs, but then
14539 they do cause partial register stalls. */
14540 if (regno < 4 || mode != QImode || TARGET_64BIT)
14542 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14545 /* Return the cost of moving data of mode M between a
14546 register and memory. A value of 2 is the default; this cost is
14547 relative to those in `REGISTER_MOVE_COST'.
14549 If moving between registers and memory is more expensive than
14550 between two registers, you should define this macro to express the
14553 Model also increased moving costs of QImode registers in non
14557 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14559 if (FLOAT_CLASS_P (class))
14577 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14579 if (SSE_CLASS_P (class))
14582 switch (GET_MODE_SIZE (mode))
14596 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14598 if (MMX_CLASS_P (class))
14601 switch (GET_MODE_SIZE (mode))
14612 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14614 switch (GET_MODE_SIZE (mode))
14618 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14619 : ix86_cost->movzbl_load);
14621 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14622 : ix86_cost->int_store[0] + 4);
14625 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14627 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14628 if (mode == TFmode)
14630 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14631 * (((int) GET_MODE_SIZE (mode)
14632 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14636 /* Compute a (partial) cost for rtx X. Return true if the complete
14637 cost has been computed, and false if subexpressions should be
14638 scanned. In either case, *TOTAL contains the cost result. */
14641 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14643 enum machine_mode mode = GET_MODE (x);
14651 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14653 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14655 else if (flag_pic && SYMBOLIC_CONST (x)
14657 || (!GET_CODE (x) != LABEL_REF
14658 && (GET_CODE (x) != SYMBOL_REF
14659 || !SYMBOL_REF_LOCAL_P (x)))))
14666 if (mode == VOIDmode)
14669 switch (standard_80387_constant_p (x))
14674 default: /* Other constants */
14679 /* Start with (MEM (SYMBOL_REF)), since that's where
14680 it'll probably end up. Add a penalty for size. */
14681 *total = (COSTS_N_INSNS (1)
14682 + (flag_pic != 0 && !TARGET_64BIT)
14683 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14689 /* The zero extensions is often completely free on x86_64, so make
14690 it as cheap as possible. */
14691 if (TARGET_64BIT && mode == DImode
14692 && GET_MODE (XEXP (x, 0)) == SImode)
14694 else if (TARGET_ZERO_EXTEND_WITH_AND)
14695 *total = COSTS_N_INSNS (ix86_cost->add);
14697 *total = COSTS_N_INSNS (ix86_cost->movzx);
14701 *total = COSTS_N_INSNS (ix86_cost->movsx);
14705 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14706 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14708 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14711 *total = COSTS_N_INSNS (ix86_cost->add);
14714 if ((value == 2 || value == 3)
14715 && !TARGET_DECOMPOSE_LEA
14716 && ix86_cost->lea <= ix86_cost->shift_const)
14718 *total = COSTS_N_INSNS (ix86_cost->lea);
14728 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14730 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14732 if (INTVAL (XEXP (x, 1)) > 32)
14733 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14735 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14739 if (GET_CODE (XEXP (x, 1)) == AND)
14740 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14742 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14747 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14748 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14750 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14755 if (FLOAT_MODE_P (mode))
14756 *total = COSTS_N_INSNS (ix86_cost->fmul);
14757 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14759 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14762 for (nbits = 0; value != 0; value >>= 1)
14765 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14766 + nbits * ix86_cost->mult_bit);
14770 /* This is arbitrary */
14771 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14772 + 7 * ix86_cost->mult_bit);
14780 if (FLOAT_MODE_P (mode))
14781 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14783 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14787 if (FLOAT_MODE_P (mode))
14788 *total = COSTS_N_INSNS (ix86_cost->fadd);
14789 else if (!TARGET_DECOMPOSE_LEA
14790 && GET_MODE_CLASS (mode) == MODE_INT
14791 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14793 if (GET_CODE (XEXP (x, 0)) == PLUS
14794 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14795 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14796 && CONSTANT_P (XEXP (x, 1)))
14798 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14799 if (val == 2 || val == 4 || val == 8)
14801 *total = COSTS_N_INSNS (ix86_cost->lea);
14802 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14803 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14805 *total += rtx_cost (XEXP (x, 1), outer_code);
14809 else if (GET_CODE (XEXP (x, 0)) == MULT
14810 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14812 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14813 if (val == 2 || val == 4 || val == 8)
14815 *total = COSTS_N_INSNS (ix86_cost->lea);
14816 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14817 *total += rtx_cost (XEXP (x, 1), outer_code);
14821 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14823 *total = COSTS_N_INSNS (ix86_cost->lea);
14824 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14825 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14826 *total += rtx_cost (XEXP (x, 1), outer_code);
14833 if (FLOAT_MODE_P (mode))
14835 *total = COSTS_N_INSNS (ix86_cost->fadd);
14843 if (!TARGET_64BIT && mode == DImode)
14845 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14846 + (rtx_cost (XEXP (x, 0), outer_code)
14847 << (GET_MODE (XEXP (x, 0)) != DImode))
14848 + (rtx_cost (XEXP (x, 1), outer_code)
14849 << (GET_MODE (XEXP (x, 1)) != DImode)));
14855 if (FLOAT_MODE_P (mode))
14857 *total = COSTS_N_INSNS (ix86_cost->fchs);
14863 if (!TARGET_64BIT && mode == DImode)
14864 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14866 *total = COSTS_N_INSNS (ix86_cost->add);
14870 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14875 if (FLOAT_MODE_P (mode))
14876 *total = COSTS_N_INSNS (ix86_cost->fabs);
14880 if (FLOAT_MODE_P (mode))
14881 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14885 if (XINT (x, 1) == UNSPEC_TP)
14894 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14896 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
14899 fputs ("\tpushl $", asm_out_file);
14900 assemble_name (asm_out_file, XSTR (symbol, 0));
14901 fputc ('\n', asm_out_file);
14907 static int current_machopic_label_num;
14909 /* Given a symbol name and its associated stub, write out the
14910 definition of the stub. */
14913 machopic_output_stub (FILE *file, const char *symb, const char *stub)
14915 unsigned int length;
14916 char *binder_name, *symbol_name, lazy_ptr_name[32];
14917 int label = ++current_machopic_label_num;
14919 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14920 symb = (*targetm.strip_name_encoding) (symb);
14922 length = strlen (stub);
14923 binder_name = alloca (length + 32);
14924 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14926 length = strlen (symb);
14927 symbol_name = alloca (length + 32);
14928 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14930 sprintf (lazy_ptr_name, "L%d$lz", label);
14933 machopic_picsymbol_stub_section ();
14935 machopic_symbol_stub_section ();
14937 fprintf (file, "%s:\n", stub);
14938 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14942 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14943 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14944 fprintf (file, "\tjmp %%edx\n");
14947 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14949 fprintf (file, "%s:\n", binder_name);
14953 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14954 fprintf (file, "\tpushl %%eax\n");
14957 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14959 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14961 machopic_lazy_symbol_ptr_section ();
14962 fprintf (file, "%s:\n", lazy_ptr_name);
14963 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14964 fprintf (file, "\t.long %s\n", binder_name);
14966 #endif /* TARGET_MACHO */
14968 /* Order the registers for register allocator. */
14971 x86_order_regs_for_local_alloc (void)
14976 /* First allocate the local general purpose registers. */
14977 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14978 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14979 reg_alloc_order [pos++] = i;
14981 /* Global general purpose registers. */
14982 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14983 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14984 reg_alloc_order [pos++] = i;
14986 /* x87 registers come first in case we are doing FP math
14988 if (!TARGET_SSE_MATH)
14989 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14990 reg_alloc_order [pos++] = i;
14992 /* SSE registers. */
14993 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14994 reg_alloc_order [pos++] = i;
14995 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14996 reg_alloc_order [pos++] = i;
14998 /* x87 registers. */
14999 if (TARGET_SSE_MATH)
15000 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15001 reg_alloc_order [pos++] = i;
15003 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15004 reg_alloc_order [pos++] = i;
15006 /* Initialize the rest of array as we do not allocate some registers
15008 while (pos < FIRST_PSEUDO_REGISTER)
15009 reg_alloc_order [pos++] = 0;
15012 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15013 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15016 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15017 struct attribute_spec.handler. */
15019 ix86_handle_struct_attribute (tree *node, tree name,
15020 tree args ATTRIBUTE_UNUSED,
15021 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15024 if (DECL_P (*node))
15026 if (TREE_CODE (*node) == TYPE_DECL)
15027 type = &TREE_TYPE (*node);
15032 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15033 || TREE_CODE (*type) == UNION_TYPE)))
15035 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15036 *no_add_attrs = true;
15039 else if ((is_attribute_p ("ms_struct", name)
15040 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15041 || ((is_attribute_p ("gcc_struct", name)
15042 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15044 warning ("`%s' incompatible attribute ignored",
15045 IDENTIFIER_POINTER (name));
15046 *no_add_attrs = true;
15053 ix86_ms_bitfield_layout_p (tree record_type)
15055 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15056 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15057 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15060 /* Returns an expression indicating where the this parameter is
15061 located on entry to the FUNCTION. */
15064 x86_this_parameter (tree function)
15066 tree type = TREE_TYPE (function);
15070 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15071 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15074 if (ix86_fntype_regparm (type) > 0)
15078 parm = TYPE_ARG_TYPES (type);
15079 /* Figure out whether or not the function has a variable number of
15081 for (; parm; parm = TREE_CHAIN (parm))
15082 if (TREE_VALUE (parm) == void_type_node)
15084 /* If not, the this parameter is in %eax. */
15086 return gen_rtx_REG (SImode, 0);
15089 if (aggregate_value_p (TREE_TYPE (type)))
15090 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15092 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15095 /* Determine whether x86_output_mi_thunk can succeed. */
15098 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15099 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15100 HOST_WIDE_INT vcall_offset, tree function)
15102 /* 64-bit can handle anything. */
15106 /* For 32-bit, everything's fine if we have one free register. */
15107 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15110 /* Need a free register for vcall_offset. */
15114 /* Need a free register for GOT references. */
15115 if (flag_pic && !(*targetm.binds_local_p) (function))
15118 /* Otherwise ok. */
15122 /* Output the assembler code for a thunk function. THUNK_DECL is the
15123 declaration for the thunk function itself, FUNCTION is the decl for
15124 the target function. DELTA is an immediate constant offset to be
15125 added to THIS. If VCALL_OFFSET is nonzero, the word at
15126 *(*this + vcall_offset) should be added to THIS. */
15129 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15130 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15131 HOST_WIDE_INT vcall_offset, tree function)
15134 rtx this = x86_this_parameter (function);
15137 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15138 pull it in now and let DELTA benefit. */
15141 else if (vcall_offset)
15143 /* Put the this parameter into %eax. */
15145 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15146 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15149 this_reg = NULL_RTX;
15151 /* Adjust the this parameter by a fixed constant. */
15154 xops[0] = GEN_INT (delta);
15155 xops[1] = this_reg ? this_reg : this;
15158 if (!x86_64_general_operand (xops[0], DImode))
15160 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15162 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15166 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15169 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15172 /* Adjust the this parameter by a value stored in the vtable. */
15176 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15178 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15180 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15183 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15185 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15187 /* Adjust the this parameter. */
15188 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15189 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15191 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15192 xops[0] = GEN_INT (vcall_offset);
15194 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15195 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15197 xops[1] = this_reg;
15199 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15201 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15204 /* If necessary, drop THIS back to its stack slot. */
15205 if (this_reg && this_reg != this)
15207 xops[0] = this_reg;
15209 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15212 xops[0] = XEXP (DECL_RTL (function), 0);
15215 if (!flag_pic || (*targetm.binds_local_p) (function))
15216 output_asm_insn ("jmp\t%P0", xops);
15219 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15220 tmp = gen_rtx_CONST (Pmode, tmp);
15221 tmp = gen_rtx_MEM (QImode, tmp);
15223 output_asm_insn ("jmp\t%A0", xops);
15228 if (!flag_pic || (*targetm.binds_local_p) (function))
15229 output_asm_insn ("jmp\t%P0", xops);
15234 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15235 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15236 tmp = gen_rtx_MEM (QImode, tmp);
15238 output_asm_insn ("jmp\t%0", xops);
15241 #endif /* TARGET_MACHO */
15243 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15244 output_set_got (tmp);
15247 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15248 output_asm_insn ("jmp\t{*}%1", xops);
15254 x86_file_start (void)
15256 default_file_start ();
15257 if (X86_FILE_START_VERSION_DIRECTIVE)
15258 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15259 if (X86_FILE_START_FLTUSED)
15260 fputs ("\t.global\t__fltused\n", asm_out_file);
15261 if (ix86_asm_dialect == ASM_INTEL)
15262 fputs ("\t.intel_syntax\n", asm_out_file);
15266 x86_field_alignment (tree field, int computed)
15268 enum machine_mode mode;
15269 tree type = TREE_TYPE (field);
15271 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15273 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15274 ? get_inner_array_type (type) : type);
15275 if (mode == DFmode || mode == DCmode
15276 || GET_MODE_CLASS (mode) == MODE_INT
15277 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15278 return MIN (32, computed);
15282 /* Output assembler code to FILE to increment profiler label # LABELNO
15283 for profiling a function entry. */
15285 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15290 #ifndef NO_PROFILE_COUNTERS
15291 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15293 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15297 #ifndef NO_PROFILE_COUNTERS
15298 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15300 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15304 #ifndef NO_PROFILE_COUNTERS
15305 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15306 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15308 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15312 #ifndef NO_PROFILE_COUNTERS
15313 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15314 PROFILE_COUNT_REGISTER);
15316 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15320 /* We don't have exact information about the insn sizes, but we may assume
15321 quite safely that we are informed about all 1 byte insns and memory
15322 address sizes. This is enough to eliminate unnecessary padding in
15326 min_insn_size (rtx insn)
15330 if (!INSN_P (insn) || !active_insn_p (insn))
15333 /* Discard alignments we've emit and jump instructions. */
15334 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15335 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15337 if (GET_CODE (insn) == JUMP_INSN
15338 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15339 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15342 /* Important case - calls are always 5 bytes.
15343 It is common to have many calls in the row. */
15344 if (GET_CODE (insn) == CALL_INSN
15345 && symbolic_reference_mentioned_p (PATTERN (insn))
15346 && !SIBLING_CALL_P (insn))
15348 if (get_attr_length (insn) <= 1)
15351 /* For normal instructions we may rely on the sizes of addresses
15352 and the presence of symbol to require 4 bytes of encoding.
15353 This is not the case for jumps where references are PC relative. */
15354 if (GET_CODE (insn) != JUMP_INSN)
15356 l = get_attr_length_address (insn);
15357 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15366 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15370 k8_avoid_jump_misspredicts (void)
15372 rtx insn, start = get_insns ();
15373 int nbytes = 0, njumps = 0;
15376 /* Look for all minimal intervals of instructions containing 4 jumps.
15377 The intervals are bounded by START and INSN. NBYTES is the total
15378 size of instructions in the interval including INSN and not including
15379 START. When the NBYTES is smaller than 16 bytes, it is possible
15380 that the end of START and INSN ends up in the same 16byte page.
15382 The smallest offset in the page INSN can start is the case where START
15383 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15384 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15386 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15389 nbytes += min_insn_size (insn);
15391 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15392 INSN_UID (insn), min_insn_size (insn));
15393 if ((GET_CODE (insn) == JUMP_INSN
15394 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15395 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15396 || GET_CODE (insn) == CALL_INSN)
15403 start = NEXT_INSN (start);
15404 if ((GET_CODE (start) == JUMP_INSN
15405 && GET_CODE (PATTERN (start)) != ADDR_VEC
15406 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15407 || GET_CODE (start) == CALL_INSN)
15408 njumps--, isjump = 1;
15411 nbytes -= min_insn_size (start);
15416 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15417 INSN_UID (start), INSN_UID (insn), nbytes);
15419 if (njumps == 3 && isjump && nbytes < 16)
15421 int padsize = 15 - nbytes + min_insn_size (insn);
15424 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15425 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15430 /* Implement machine specific optimizations.
15431 At the moment we implement single transformation: AMD Athlon works faster
15432 when RET is not destination of conditional jump or directly preceded
15433 by other jump instruction. We avoid the penalty by inserting NOP just
15434 before the RET instructions in such cases. */
15440 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15442 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15444 basic_block bb = e->src;
15447 bool replace = false;
15449 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15450 || !maybe_hot_bb_p (bb))
15452 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15453 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15455 if (prev && GET_CODE (prev) == CODE_LABEL)
15458 for (e = bb->pred; e; e = e->pred_next)
15459 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15460 && !(e->flags & EDGE_FALLTHRU))
15465 prev = prev_active_insn (ret);
15467 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15468 || GET_CODE (prev) == CALL_INSN))
15470 /* Empty functions get branch mispredict even when the jump destination
15471 is not visible to us. */
15472 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15477 emit_insn_before (gen_return_internal_long (), ret);
15481 k8_avoid_jump_misspredicts ();
15484 /* Return nonzero when QImode register that must be represented via REX prefix
15487 x86_extended_QIreg_mentioned_p (rtx insn)
15490 extract_insn_cached (insn);
15491 for (i = 0; i < recog_data.n_operands; i++)
15492 if (REG_P (recog_data.operand[i])
15493 && REGNO (recog_data.operand[i]) >= 4)
15498 /* Return nonzero when P points to register encoded via REX prefix.
15499 Called via for_each_rtx. */
15501 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15503 unsigned int regno;
15506 regno = REGNO (*p);
15507 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15510 /* Return true when INSN mentions register that must be encoded using REX
15513 x86_extended_reg_mentioned_p (rtx insn)
15515 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15518 /* Generate an unsigned DImode to FP conversion. This is the same code
15519 optabs would emit if we didn't have TFmode patterns. */
15522 x86_emit_floatuns (rtx operands[2])
15524 rtx neglab, donelab, i0, i1, f0, in, out;
15525 enum machine_mode mode;
15528 in = force_reg (DImode, operands[1]);
15529 mode = GET_MODE (out);
15530 neglab = gen_label_rtx ();
15531 donelab = gen_label_rtx ();
15532 i1 = gen_reg_rtx (Pmode);
15533 f0 = gen_reg_rtx (mode);
15535 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15537 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15538 emit_jump_insn (gen_jump (donelab));
15541 emit_label (neglab);
15543 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15544 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15545 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15546 expand_float (f0, i0, 0);
15547 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15549 emit_label (donelab);
15552 /* Return if we do not know how to pass TYPE solely in registers. */
15554 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15556 if (default_must_pass_in_stack (mode, type))
15558 return (!TARGET_64BIT && type && mode == TImode);
15561 #include "gt-i386.h"