1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
53 /* Processor costs (relative to an add) */
55 struct processor_costs size_cost = { /* costs for tunning for size */
56 2, /* cost of an add instruction */
57 3, /* cost of a lea instruction */
58 2, /* variable shift costs */
59 3, /* constant shift costs */
60 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
61 0, /* cost of multiply per each bit set */
62 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
63 3, /* cost of movsx */
64 3, /* cost of movzx */
67 2, /* cost for loading QImode using movzbl */
68 {2, 2, 2}, /* cost of loading integer registers
69 in QImode, HImode and SImode.
70 Relative to reg-reg move (2). */
71 {2, 2, 2}, /* cost of storing integer registers */
72 2, /* cost of reg,reg fld/fst */
73 {2, 2, 2}, /* cost of loading fp registers
74 in SFmode, DFmode and XFmode */
75 {2, 2, 2}, /* cost of loading integer registers */
76 3, /* cost of moving MMX register */
77 {3, 3}, /* cost of loading MMX registers
78 in SImode and DImode */
79 {3, 3}, /* cost of storing MMX registers
80 in SImode and DImode */
81 3, /* cost of moving SSE register */
82 {3, 3, 3}, /* cost of loading SSE registers
83 in SImode, DImode and TImode */
84 {3, 3, 3}, /* cost of storing SSE registers
85 in SImode, DImode and TImode */
86 3, /* MMX or SSE register to integer */
87 0, /* size of prefetch block */
88 0, /* number of parallel prefetches */
90 2, /* cost of FADD and FSUB insns. */
91 2, /* cost of FMUL instruction. */
92 2, /* cost of FDIV instruction. */
93 2, /* cost of FABS instruction. */
94 2, /* cost of FCHS instruction. */
95 2, /* cost of FSQRT instruction. */
98 /* Processor costs (relative to an add) */
100 struct processor_costs i386_cost = { /* 386 specific costs */
101 1, /* cost of an add instruction */
102 1, /* cost of a lea instruction */
103 3, /* variable shift costs */
104 2, /* constant shift costs */
105 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
106 1, /* cost of multiply per each bit set */
107 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
108 3, /* cost of movsx */
109 2, /* cost of movzx */
110 15, /* "large" insn */
112 4, /* cost for loading QImode using movzbl */
113 {2, 4, 2}, /* cost of loading integer registers
114 in QImode, HImode and SImode.
115 Relative to reg-reg move (2). */
116 {2, 4, 2}, /* cost of storing integer registers */
117 2, /* cost of reg,reg fld/fst */
118 {8, 8, 8}, /* cost of loading fp registers
119 in SFmode, DFmode and XFmode */
120 {8, 8, 8}, /* cost of loading integer registers */
121 2, /* cost of moving MMX register */
122 {4, 8}, /* cost of loading MMX registers
123 in SImode and DImode */
124 {4, 8}, /* cost of storing MMX registers
125 in SImode and DImode */
126 2, /* cost of moving SSE register */
127 {4, 8, 16}, /* cost of loading SSE registers
128 in SImode, DImode and TImode */
129 {4, 8, 16}, /* cost of storing SSE registers
130 in SImode, DImode and TImode */
131 3, /* MMX or SSE register to integer */
132 0, /* size of prefetch block */
133 0, /* number of parallel prefetches */
135 23, /* cost of FADD and FSUB insns. */
136 27, /* cost of FMUL instruction. */
137 88, /* cost of FDIV instruction. */
138 22, /* cost of FABS instruction. */
139 24, /* cost of FCHS instruction. */
140 122, /* cost of FSQRT instruction. */
144 struct processor_costs i486_cost = { /* 486 specific costs */
145 1, /* cost of an add instruction */
146 1, /* cost of a lea instruction */
147 3, /* variable shift costs */
148 2, /* constant shift costs */
149 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
150 1, /* cost of multiply per each bit set */
151 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
152 3, /* cost of movsx */
153 2, /* cost of movzx */
154 15, /* "large" insn */
156 4, /* cost for loading QImode using movzbl */
157 {2, 4, 2}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 4, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {8, 8, 8}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {8, 8, 8}, /* cost of loading integer registers */
165 2, /* cost of moving MMX register */
166 {4, 8}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {4, 8}, /* cost of storing MMX registers
169 in SImode and DImode */
170 2, /* cost of moving SSE register */
171 {4, 8, 16}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {4, 8, 16}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
175 3, /* MMX or SSE register to integer */
176 0, /* size of prefetch block */
177 0, /* number of parallel prefetches */
179 8, /* cost of FADD and FSUB insns. */
180 16, /* cost of FMUL instruction. */
181 73, /* cost of FDIV instruction. */
182 3, /* cost of FABS instruction. */
183 3, /* cost of FCHS instruction. */
184 83, /* cost of FSQRT instruction. */
188 struct processor_costs pentium_cost = {
189 1, /* cost of an add instruction */
190 1, /* cost of a lea instruction */
191 4, /* variable shift costs */
192 1, /* constant shift costs */
193 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
194 0, /* cost of multiply per each bit set */
195 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
196 3, /* cost of movsx */
197 2, /* cost of movzx */
198 8, /* "large" insn */
200 6, /* cost for loading QImode using movzbl */
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
203 Relative to reg-reg move (2). */
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {2, 2, 6}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
208 {4, 4, 6}, /* cost of loading integer registers */
209 8, /* cost of moving MMX register */
210 {8, 8}, /* cost of loading MMX registers
211 in SImode and DImode */
212 {8, 8}, /* cost of storing MMX registers
213 in SImode and DImode */
214 2, /* cost of moving SSE register */
215 {4, 8, 16}, /* cost of loading SSE registers
216 in SImode, DImode and TImode */
217 {4, 8, 16}, /* cost of storing SSE registers
218 in SImode, DImode and TImode */
219 3, /* MMX or SSE register to integer */
220 0, /* size of prefetch block */
221 0, /* number of parallel prefetches */
223 3, /* cost of FADD and FSUB insns. */
224 3, /* cost of FMUL instruction. */
225 39, /* cost of FDIV instruction. */
226 1, /* cost of FABS instruction. */
227 1, /* cost of FCHS instruction. */
228 70, /* cost of FSQRT instruction. */
232 struct processor_costs pentiumpro_cost = {
233 1, /* cost of an add instruction */
234 1, /* cost of a lea instruction */
235 1, /* variable shift costs */
236 1, /* constant shift costs */
237 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
238 0, /* cost of multiply per each bit set */
239 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
240 1, /* cost of movsx */
241 1, /* cost of movzx */
242 8, /* "large" insn */
244 2, /* cost for loading QImode using movzbl */
245 {4, 4, 4}, /* cost of loading integer registers
246 in QImode, HImode and SImode.
247 Relative to reg-reg move (2). */
248 {2, 2, 2}, /* cost of storing integer registers */
249 2, /* cost of reg,reg fld/fst */
250 {2, 2, 6}, /* cost of loading fp registers
251 in SFmode, DFmode and XFmode */
252 {4, 4, 6}, /* cost of loading integer registers */
253 2, /* cost of moving MMX register */
254 {2, 2}, /* cost of loading MMX registers
255 in SImode and DImode */
256 {2, 2}, /* cost of storing MMX registers
257 in SImode and DImode */
258 2, /* cost of moving SSE register */
259 {2, 2, 8}, /* cost of loading SSE registers
260 in SImode, DImode and TImode */
261 {2, 2, 8}, /* cost of storing SSE registers
262 in SImode, DImode and TImode */
263 3, /* MMX or SSE register to integer */
264 32, /* size of prefetch block */
265 6, /* number of parallel prefetches */
267 3, /* cost of FADD and FSUB insns. */
268 5, /* cost of FMUL instruction. */
269 56, /* cost of FDIV instruction. */
270 2, /* cost of FABS instruction. */
271 2, /* cost of FCHS instruction. */
272 56, /* cost of FSQRT instruction. */
276 struct processor_costs k6_cost = {
277 1, /* cost of an add instruction */
278 2, /* cost of a lea instruction */
279 1, /* variable shift costs */
280 1, /* constant shift costs */
281 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
282 0, /* cost of multiply per each bit set */
283 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
284 2, /* cost of movsx */
285 2, /* cost of movzx */
286 8, /* "large" insn */
288 3, /* cost for loading QImode using movzbl */
289 {4, 5, 4}, /* cost of loading integer registers
290 in QImode, HImode and SImode.
291 Relative to reg-reg move (2). */
292 {2, 3, 2}, /* cost of storing integer registers */
293 4, /* cost of reg,reg fld/fst */
294 {6, 6, 6}, /* cost of loading fp registers
295 in SFmode, DFmode and XFmode */
296 {4, 4, 4}, /* cost of loading integer registers */
297 2, /* cost of moving MMX register */
298 {2, 2}, /* cost of loading MMX registers
299 in SImode and DImode */
300 {2, 2}, /* cost of storing MMX registers
301 in SImode and DImode */
302 2, /* cost of moving SSE register */
303 {2, 2, 8}, /* cost of loading SSE registers
304 in SImode, DImode and TImode */
305 {2, 2, 8}, /* cost of storing SSE registers
306 in SImode, DImode and TImode */
307 6, /* MMX or SSE register to integer */
308 32, /* size of prefetch block */
309 1, /* number of parallel prefetches */
311 2, /* cost of FADD and FSUB insns. */
312 2, /* cost of FMUL instruction. */
313 56, /* cost of FDIV instruction. */
314 2, /* cost of FABS instruction. */
315 2, /* cost of FCHS instruction. */
316 56, /* cost of FSQRT instruction. */
320 struct processor_costs athlon_cost = {
321 1, /* cost of an add instruction */
322 2, /* cost of a lea instruction */
323 1, /* variable shift costs */
324 1, /* constant shift costs */
325 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
326 0, /* cost of multiply per each bit set */
327 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
328 1, /* cost of movsx */
329 1, /* cost of movzx */
330 8, /* "large" insn */
332 4, /* cost for loading QImode using movzbl */
333 {3, 4, 3}, /* cost of loading integer registers
334 in QImode, HImode and SImode.
335 Relative to reg-reg move (2). */
336 {3, 4, 3}, /* cost of storing integer registers */
337 4, /* cost of reg,reg fld/fst */
338 {4, 4, 12}, /* cost of loading fp registers
339 in SFmode, DFmode and XFmode */
340 {6, 6, 8}, /* cost of loading integer registers */
341 2, /* cost of moving MMX register */
342 {4, 4}, /* cost of loading MMX registers
343 in SImode and DImode */
344 {4, 4}, /* cost of storing MMX registers
345 in SImode and DImode */
346 2, /* cost of moving SSE register */
347 {4, 4, 6}, /* cost of loading SSE registers
348 in SImode, DImode and TImode */
349 {4, 4, 5}, /* cost of storing SSE registers
350 in SImode, DImode and TImode */
351 5, /* MMX or SSE register to integer */
352 64, /* size of prefetch block */
353 6, /* number of parallel prefetches */
355 4, /* cost of FADD and FSUB insns. */
356 4, /* cost of FMUL instruction. */
357 24, /* cost of FDIV instruction. */
358 2, /* cost of FABS instruction. */
359 2, /* cost of FCHS instruction. */
360 35, /* cost of FSQRT instruction. */
364 struct processor_costs k8_cost = {
365 1, /* cost of an add instruction */
366 2, /* cost of a lea instruction */
367 1, /* variable shift costs */
368 1, /* constant shift costs */
369 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
370 0, /* cost of multiply per each bit set */
371 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
372 1, /* cost of movsx */
373 1, /* cost of movzx */
374 8, /* "large" insn */
376 4, /* cost for loading QImode using movzbl */
377 {3, 4, 3}, /* cost of loading integer registers
378 in QImode, HImode and SImode.
379 Relative to reg-reg move (2). */
380 {3, 4, 3}, /* cost of storing integer registers */
381 4, /* cost of reg,reg fld/fst */
382 {4, 4, 12}, /* cost of loading fp registers
383 in SFmode, DFmode and XFmode */
384 {6, 6, 8}, /* cost of loading integer registers */
385 2, /* cost of moving MMX register */
386 {3, 3}, /* cost of loading MMX registers
387 in SImode and DImode */
388 {4, 4}, /* cost of storing MMX registers
389 in SImode and DImode */
390 2, /* cost of moving SSE register */
391 {4, 3, 6}, /* cost of loading SSE registers
392 in SImode, DImode and TImode */
393 {4, 4, 5}, /* cost of storing SSE registers
394 in SImode, DImode and TImode */
395 5, /* MMX or SSE register to integer */
396 64, /* size of prefetch block */
397 6, /* number of parallel prefetches */
399 4, /* cost of FADD and FSUB insns. */
400 4, /* cost of FMUL instruction. */
401 19, /* cost of FDIV instruction. */
402 2, /* cost of FABS instruction. */
403 2, /* cost of FCHS instruction. */
404 35, /* cost of FSQRT instruction. */
408 struct processor_costs pentium4_cost = {
409 1, /* cost of an add instruction */
410 1, /* cost of a lea instruction */
411 4, /* variable shift costs */
412 4, /* constant shift costs */
413 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
414 0, /* cost of multiply per each bit set */
415 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
416 1, /* cost of movsx */
417 1, /* cost of movzx */
418 16, /* "large" insn */
420 2, /* cost for loading QImode using movzbl */
421 {4, 5, 4}, /* cost of loading integer registers
422 in QImode, HImode and SImode.
423 Relative to reg-reg move (2). */
424 {2, 3, 2}, /* cost of storing integer registers */
425 2, /* cost of reg,reg fld/fst */
426 {2, 2, 6}, /* cost of loading fp registers
427 in SFmode, DFmode and XFmode */
428 {4, 4, 6}, /* cost of loading integer registers */
429 2, /* cost of moving MMX register */
430 {2, 2}, /* cost of loading MMX registers
431 in SImode and DImode */
432 {2, 2}, /* cost of storing MMX registers
433 in SImode and DImode */
434 12, /* cost of moving SSE register */
435 {12, 12, 12}, /* cost of loading SSE registers
436 in SImode, DImode and TImode */
437 {2, 2, 8}, /* cost of storing SSE registers
438 in SImode, DImode and TImode */
439 10, /* MMX or SSE register to integer */
440 64, /* size of prefetch block */
441 6, /* number of parallel prefetches */
443 5, /* cost of FADD and FSUB insns. */
444 7, /* cost of FMUL instruction. */
445 43, /* cost of FDIV instruction. */
446 2, /* cost of FABS instruction. */
447 2, /* cost of FCHS instruction. */
448 43, /* cost of FSQRT instruction. */
451 const struct processor_costs *ix86_cost = &pentium_cost;
453 /* Processor feature/optimization bitmasks. */
454 #define m_386 (1<<PROCESSOR_I386)
455 #define m_486 (1<<PROCESSOR_I486)
456 #define m_PENT (1<<PROCESSOR_PENTIUM)
457 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
458 #define m_K6 (1<<PROCESSOR_K6)
459 #define m_ATHLON (1<<PROCESSOR_ATHLON)
460 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
461 #define m_K8 (1<<PROCESSOR_K8)
462 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
464 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
465 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
466 const int x86_zero_extend_with_and = m_486 | m_PENT;
467 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
468 const int x86_double_with_add = ~m_386;
469 const int x86_use_bit_test = m_386;
470 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
471 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
472 const int x86_3dnow_a = m_ATHLON_K8;
473 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
474 const int x86_branch_hints = m_PENT4;
475 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
476 const int x86_partial_reg_stall = m_PPRO;
477 const int x86_use_loop = m_K6;
478 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
479 const int x86_use_mov0 = m_K6;
480 const int x86_use_cltd = ~(m_PENT | m_K6);
481 const int x86_read_modify_write = ~m_PENT;
482 const int x86_read_modify = ~(m_PENT | m_PPRO);
483 const int x86_split_long_moves = m_PPRO;
484 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
485 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
486 const int x86_single_stringop = m_386 | m_PENT4;
487 const int x86_qimode_math = ~(0);
488 const int x86_promote_qi_regs = 0;
489 const int x86_himode_math = ~(m_PPRO);
490 const int x86_promote_hi_regs = m_PPRO;
491 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
492 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
493 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
494 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
495 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
496 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
497 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
498 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
499 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
500 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
501 const int x86_decompose_lea = m_PENT4;
502 const int x86_shift1 = ~m_486;
503 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
504 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
505 /* Set for machines where the type and dependencies are resolved on SSE register
506 parts instead of whole registers, so we may maintain just lower part of
507 scalar values in proper format leaving the upper part undefined. */
508 const int x86_sse_partial_regs = m_ATHLON_K8;
509 /* Athlon optimizes partial-register FPS special case, thus avoiding the
510 need for extra instructions beforehand */
511 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
512 const int x86_sse_typeless_stores = m_ATHLON_K8;
513 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
514 const int x86_use_ffreep = m_ATHLON_K8;
515 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
517 /* In case the average insn count for single function invocation is
518 lower than this constant, emit fast (but longer) prologue and
520 #define FAST_PROLOGUE_INSN_COUNT 20
522 /* Set by prologue expander and used by epilogue expander to determine
524 static int use_fast_prologue_epilogue;
526 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
527 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
528 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
529 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
531 /* Array of the smallest class containing reg number REGNO, indexed by
532 REGNO. Used by REGNO_REG_CLASS in i386.h. */
534 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
537 AREG, DREG, CREG, BREG,
539 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
541 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
542 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
545 /* flags, fpsr, dirflag, frame */
546 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
547 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
549 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
551 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
552 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
553 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
557 /* The "default" register map used in 32bit mode. */
559 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
561 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
562 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
563 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
564 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
565 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
566 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
567 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
570 static int const x86_64_int_parameter_registers[6] =
572 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
573 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
576 static int const x86_64_int_return_registers[4] =
578 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
581 /* The "default" register map used in 64bit mode. */
582 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
584 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
585 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
586 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
587 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
588 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
589 8,9,10,11,12,13,14,15, /* extended integer registers */
590 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
593 /* Define the register numbers to be used in Dwarf debugging information.
594 The SVR4 reference port C compiler uses the following register numbers
595 in its Dwarf output code:
596 0 for %eax (gcc regno = 0)
597 1 for %ecx (gcc regno = 2)
598 2 for %edx (gcc regno = 1)
599 3 for %ebx (gcc regno = 3)
600 4 for %esp (gcc regno = 7)
601 5 for %ebp (gcc regno = 6)
602 6 for %esi (gcc regno = 4)
603 7 for %edi (gcc regno = 5)
604 The following three DWARF register numbers are never generated by
605 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
606 believes these numbers have these meanings.
607 8 for %eip (no gcc equivalent)
608 9 for %eflags (gcc regno = 17)
609 10 for %trapno (no gcc equivalent)
610 It is not at all clear how we should number the FP stack registers
611 for the x86 architecture. If the version of SDB on x86/svr4 were
612 a bit less brain dead with respect to floating-point then we would
613 have a precedent to follow with respect to DWARF register numbers
614 for x86 FP registers, but the SDB on x86/svr4 is so completely
615 broken with respect to FP registers that it is hardly worth thinking
616 of it as something to strive for compatibility with.
617 The version of x86/svr4 SDB I have at the moment does (partially)
618 seem to believe that DWARF register number 11 is associated with
619 the x86 register %st(0), but that's about all. Higher DWARF
620 register numbers don't seem to be associated with anything in
621 particular, and even for DWARF regno 11, SDB only seems to under-
622 stand that it should say that a variable lives in %st(0) (when
623 asked via an `=' command) if we said it was in DWARF regno 11,
624 but SDB still prints garbage when asked for the value of the
625 variable in question (via a `/' command).
626 (Also note that the labels SDB prints for various FP stack regs
627 when doing an `x' command are all wrong.)
628 Note that these problems generally don't affect the native SVR4
629 C compiler because it doesn't allow the use of -O with -g and
630 because when it is *not* optimizing, it allocates a memory
631 location for each floating-point variable, and the memory
632 location is what gets described in the DWARF AT_location
633 attribute for the variable in question.
634 Regardless of the severe mental illness of the x86/svr4 SDB, we
635 do something sensible here and we use the following DWARF
636 register numbers. Note that these are all stack-top-relative
638 11 for %st(0) (gcc regno = 8)
639 12 for %st(1) (gcc regno = 9)
640 13 for %st(2) (gcc regno = 10)
641 14 for %st(3) (gcc regno = 11)
642 15 for %st(4) (gcc regno = 12)
643 16 for %st(5) (gcc regno = 13)
644 17 for %st(6) (gcc regno = 14)
645 18 for %st(7) (gcc regno = 15)
647 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
649 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
650 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
651 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
652 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
653 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
654 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
655 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
658 /* Test and compare insns in i386.md store the information needed to
659 generate branch and scc insns here. */
661 rtx ix86_compare_op0 = NULL_RTX;
662 rtx ix86_compare_op1 = NULL_RTX;
664 /* The encoding characters for the four TLS models present in ELF. */
666 static char const tls_model_chars[] = " GLil";
668 #define MAX_386_STACK_LOCALS 3
669 /* Size of the register save area. */
670 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
672 /* Define the structure for the machine field in struct function. */
673 struct machine_function GTY(())
675 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
676 const char *some_ld_name;
677 int save_varrargs_registers;
678 int accesses_prev_frame;
681 #define ix86_stack_locals (cfun->machine->stack_locals)
682 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
684 /* Structure describing stack frame layout.
685 Stack grows downward:
691 saved frame pointer if frame_pointer_needed
692 <- HARD_FRAME_POINTER
698 > to_allocate <- FRAME_POINTER
710 int outgoing_arguments_size;
713 HOST_WIDE_INT to_allocate;
714 /* The offsets relative to ARG_POINTER. */
715 HOST_WIDE_INT frame_pointer_offset;
716 HOST_WIDE_INT hard_frame_pointer_offset;
717 HOST_WIDE_INT stack_pointer_offset;
720 /* Used to enable/disable debugging features. */
721 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
722 /* Code model option as passed by user. */
723 const char *ix86_cmodel_string;
725 enum cmodel ix86_cmodel;
727 const char *ix86_asm_string;
728 enum asm_dialect ix86_asm_dialect = ASM_ATT;
730 const char *ix86_tls_dialect_string;
731 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
733 /* Which unit we are generating floating point math for. */
734 enum fpmath_unit ix86_fpmath;
736 /* Which cpu are we scheduling for. */
737 enum processor_type ix86_cpu;
738 /* Which instruction set architecture to use. */
739 enum processor_type ix86_arch;
741 /* Strings to hold which cpu and instruction set architecture to use. */
742 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
743 const char *ix86_arch_string; /* for -march=<xxx> */
744 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
746 /* # of registers to use to pass arguments. */
747 const char *ix86_regparm_string;
749 /* true if sse prefetch instruction is not NOOP. */
750 int x86_prefetch_sse;
752 /* ix86_regparm_string as a number */
755 /* Alignment to use for loops and jumps: */
757 /* Power of two alignment for loops. */
758 const char *ix86_align_loops_string;
760 /* Power of two alignment for non-loop jumps. */
761 const char *ix86_align_jumps_string;
763 /* Power of two alignment for stack boundary in bytes. */
764 const char *ix86_preferred_stack_boundary_string;
766 /* Preferred alignment for stack boundary in bits. */
767 int ix86_preferred_stack_boundary;
769 /* Values 1-5: see jump.c */
770 int ix86_branch_cost;
771 const char *ix86_branch_cost_string;
773 /* Power of two alignment for functions. */
774 const char *ix86_align_funcs_string;
776 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
777 static char internal_label_prefix[16];
778 static int internal_label_prefix_len;
780 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
781 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
782 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
783 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
785 static const char *get_some_local_dynamic_name PARAMS ((void));
786 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
787 static rtx maybe_get_pool_constant PARAMS ((rtx));
788 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
789 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
791 static rtx get_thread_pointer PARAMS ((void));
792 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
793 static rtx gen_push PARAMS ((rtx));
794 static int memory_address_length PARAMS ((rtx addr));
795 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
796 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
797 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
798 static void ix86_dump_ppro_packet PARAMS ((FILE *));
799 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
800 static struct machine_function * ix86_init_machine_status PARAMS ((void));
801 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
802 static int ix86_nsaved_regs PARAMS ((void));
803 static void ix86_emit_save_regs PARAMS ((void));
804 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
805 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
806 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
807 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
808 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
809 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
810 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
811 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
812 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
813 static int ix86_issue_rate PARAMS ((void));
814 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
815 static void ix86_sched_init PARAMS ((FILE *, int, int));
816 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
817 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
818 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
819 static int ia32_multipass_dfa_lookahead PARAMS ((void));
820 static void ix86_init_mmx_sse_builtins PARAMS ((void));
821 static rtx x86_this_parameter PARAMS ((tree));
822 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
823 HOST_WIDE_INT, tree));
824 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
825 HOST_WIDE_INT, tree));
826 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
830 rtx base, index, disp;
834 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
835 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
837 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
838 static const char *ix86_strip_name_encoding PARAMS ((const char *))
841 struct builtin_description;
842 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
844 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
846 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
847 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
848 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
849 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
850 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
851 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
852 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
856 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
858 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
859 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
860 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
861 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
862 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
863 static int ix86_save_reg PARAMS ((unsigned int, int));
864 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
865 static int ix86_comp_type_attributes PARAMS ((tree, tree));
866 static int ix86_fntype_regparm PARAMS ((tree));
867 const struct attribute_spec ix86_attribute_table[];
868 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
869 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
870 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
871 static int ix86_value_regno PARAMS ((enum machine_mode));
872 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
873 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
875 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
876 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
879 /* Register class used for passing given 64bit part of the argument.
880 These represent classes as documented by the PS ABI, with the exception
881 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
882 use SF or DFmode move instead of DImode to avoid reformatting penalties.
884 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
885 whenever possible (upper half does contain padding).
887 enum x86_64_reg_class
890 X86_64_INTEGER_CLASS,
891 X86_64_INTEGERSI_CLASS,
900 static const char * const x86_64_reg_class_name[] =
901 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
903 #define MAX_CLASSES 4
904 static int classify_argument PARAMS ((enum machine_mode, tree,
905 enum x86_64_reg_class [MAX_CLASSES],
907 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
909 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
911 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
912 enum x86_64_reg_class));
914 /* Initialize the GCC target structure. */
915 #undef TARGET_ATTRIBUTE_TABLE
916 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
917 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
918 # undef TARGET_MERGE_DECL_ATTRIBUTES
919 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
922 #undef TARGET_COMP_TYPE_ATTRIBUTES
923 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
925 #undef TARGET_INIT_BUILTINS
926 #define TARGET_INIT_BUILTINS ix86_init_builtins
928 #undef TARGET_EXPAND_BUILTIN
929 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
931 #undef TARGET_ASM_FUNCTION_EPILOGUE
932 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
934 #undef TARGET_ASM_OPEN_PAREN
935 #define TARGET_ASM_OPEN_PAREN ""
936 #undef TARGET_ASM_CLOSE_PAREN
937 #define TARGET_ASM_CLOSE_PAREN ""
939 #undef TARGET_ASM_ALIGNED_HI_OP
940 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
941 #undef TARGET_ASM_ALIGNED_SI_OP
942 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
944 #undef TARGET_ASM_ALIGNED_DI_OP
945 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
948 #undef TARGET_ASM_UNALIGNED_HI_OP
949 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
950 #undef TARGET_ASM_UNALIGNED_SI_OP
951 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
952 #undef TARGET_ASM_UNALIGNED_DI_OP
953 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
955 #undef TARGET_SCHED_ADJUST_COST
956 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
957 #undef TARGET_SCHED_ISSUE_RATE
958 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
959 #undef TARGET_SCHED_VARIABLE_ISSUE
960 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
961 #undef TARGET_SCHED_INIT
962 #define TARGET_SCHED_INIT ix86_sched_init
963 #undef TARGET_SCHED_REORDER
964 #define TARGET_SCHED_REORDER ix86_sched_reorder
965 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
966 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
967 ia32_use_dfa_pipeline_interface
968 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
969 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
970 ia32_multipass_dfa_lookahead
972 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
973 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
976 #undef TARGET_HAVE_TLS
977 #define TARGET_HAVE_TLS true
979 #undef TARGET_CANNOT_FORCE_CONST_MEM
980 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
982 #undef TARGET_MS_BITFIELD_LAYOUT_P
983 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
985 #undef TARGET_ASM_OUTPUT_MI_THUNK
986 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
987 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
988 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
990 struct gcc_target targetm = TARGET_INITIALIZER;
992 /* Sometimes certain combinations of command options do not make
993 sense on a particular target machine. You can define a macro
994 `OVERRIDE_OPTIONS' to take account of this. This macro, if
995 defined, is executed once just after all the command options have
998 Don't use this macro to turn on various extra optimizations for
999 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1005 /* Comes from final.c -- no real reason to change it. */
1006 #define MAX_CODE_ALIGN 16
1010 const struct processor_costs *cost; /* Processor costs */
1011 const int target_enable; /* Target flags to enable. */
1012 const int target_disable; /* Target flags to disable. */
1013 const int align_loop; /* Default alignments. */
1014 const int align_loop_max_skip;
1015 const int align_jump;
1016 const int align_jump_max_skip;
1017 const int align_func;
1019 const processor_target_table[PROCESSOR_max] =
1021 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1022 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1023 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1024 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1025 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1026 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1027 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1028 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1031 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1034 const char *const name; /* processor name or nickname. */
1035 const enum processor_type processor;
1036 const enum pta_flags
1041 PTA_PREFETCH_SSE = 8,
1047 const processor_alias_table[] =
1049 {"i386", PROCESSOR_I386, 0},
1050 {"i486", PROCESSOR_I486, 0},
1051 {"i586", PROCESSOR_PENTIUM, 0},
1052 {"pentium", PROCESSOR_PENTIUM, 0},
1053 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1054 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1055 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1056 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1057 {"i686", PROCESSOR_PENTIUMPRO, 0},
1058 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1059 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1060 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1061 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1062 PTA_MMX | PTA_PREFETCH_SSE},
1063 {"k6", PROCESSOR_K6, PTA_MMX},
1064 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1065 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1066 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1068 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1069 | PTA_3DNOW | PTA_3DNOW_A},
1070 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1071 | PTA_3DNOW_A | PTA_SSE},
1072 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1073 | PTA_3DNOW_A | PTA_SSE},
1074 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1075 | PTA_3DNOW_A | PTA_SSE},
1076 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1077 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1080 int const pta_size = ARRAY_SIZE (processor_alias_table);
1082 /* By default our XFmode is the 80-bit extended format. If we have
1083 use TFmode instead, it's also the 80-bit format, but with padding. */
1084 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1085 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1087 /* Set the default values for switches whose default depends on TARGET_64BIT
1088 in case they weren't overwritten by command line options. */
1091 if (flag_omit_frame_pointer == 2)
1092 flag_omit_frame_pointer = 1;
1093 if (flag_asynchronous_unwind_tables == 2)
1094 flag_asynchronous_unwind_tables = 1;
1095 if (flag_pcc_struct_return == 2)
1096 flag_pcc_struct_return = 0;
1100 if (flag_omit_frame_pointer == 2)
1101 flag_omit_frame_pointer = 0;
1102 if (flag_asynchronous_unwind_tables == 2)
1103 flag_asynchronous_unwind_tables = 0;
1104 if (flag_pcc_struct_return == 2)
1105 flag_pcc_struct_return = 1;
1108 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1109 SUBTARGET_OVERRIDE_OPTIONS;
1112 if (!ix86_cpu_string && ix86_arch_string)
1113 ix86_cpu_string = ix86_arch_string;
1114 if (!ix86_cpu_string)
1115 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1116 if (!ix86_arch_string)
1117 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1119 if (ix86_cmodel_string != 0)
1121 if (!strcmp (ix86_cmodel_string, "small"))
1122 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1124 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1125 else if (!strcmp (ix86_cmodel_string, "32"))
1126 ix86_cmodel = CM_32;
1127 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1128 ix86_cmodel = CM_KERNEL;
1129 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1130 ix86_cmodel = CM_MEDIUM;
1131 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1132 ix86_cmodel = CM_LARGE;
1134 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1138 ix86_cmodel = CM_32;
1140 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1142 if (ix86_asm_string != 0)
1144 if (!strcmp (ix86_asm_string, "intel"))
1145 ix86_asm_dialect = ASM_INTEL;
1146 else if (!strcmp (ix86_asm_string, "att"))
1147 ix86_asm_dialect = ASM_ATT;
1149 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1151 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1152 error ("code model `%s' not supported in the %s bit mode",
1153 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1154 if (ix86_cmodel == CM_LARGE)
1155 sorry ("code model `large' not supported yet");
1156 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1157 sorry ("%i-bit mode not compiled in",
1158 (target_flags & MASK_64BIT) ? 64 : 32);
1160 for (i = 0; i < pta_size; i++)
1161 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1163 ix86_arch = processor_alias_table[i].processor;
1164 /* Default cpu tuning to the architecture. */
1165 ix86_cpu = ix86_arch;
1166 if (processor_alias_table[i].flags & PTA_MMX
1167 && !(target_flags_explicit & MASK_MMX))
1168 target_flags |= MASK_MMX;
1169 if (processor_alias_table[i].flags & PTA_3DNOW
1170 && !(target_flags_explicit & MASK_3DNOW))
1171 target_flags |= MASK_3DNOW;
1172 if (processor_alias_table[i].flags & PTA_3DNOW_A
1173 && !(target_flags_explicit & MASK_3DNOW_A))
1174 target_flags |= MASK_3DNOW_A;
1175 if (processor_alias_table[i].flags & PTA_SSE
1176 && !(target_flags_explicit & MASK_SSE))
1177 target_flags |= MASK_SSE;
1178 if (processor_alias_table[i].flags & PTA_SSE2
1179 && !(target_flags_explicit & MASK_SSE2))
1180 target_flags |= MASK_SSE2;
1181 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1182 x86_prefetch_sse = true;
1183 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1184 error ("CPU you selected does not support x86-64 instruction set");
1189 error ("bad value (%s) for -march= switch", ix86_arch_string);
1191 for (i = 0; i < pta_size; i++)
1192 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1194 ix86_cpu = processor_alias_table[i].processor;
1195 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1196 error ("CPU you selected does not support x86-64 instruction set");
1199 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1200 x86_prefetch_sse = true;
1202 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1205 ix86_cost = &size_cost;
1207 ix86_cost = processor_target_table[ix86_cpu].cost;
1208 target_flags |= processor_target_table[ix86_cpu].target_enable;
1209 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1211 /* Arrange to set up i386_stack_locals for all functions. */
1212 init_machine_status = ix86_init_machine_status;
1214 /* Validate -mregparm= value. */
1215 if (ix86_regparm_string)
1217 i = atoi (ix86_regparm_string);
1218 if (i < 0 || i > REGPARM_MAX)
1219 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1225 ix86_regparm = REGPARM_MAX;
1227 /* If the user has provided any of the -malign-* options,
1228 warn and use that value only if -falign-* is not set.
1229 Remove this code in GCC 3.2 or later. */
1230 if (ix86_align_loops_string)
1232 warning ("-malign-loops is obsolete, use -falign-loops");
1233 if (align_loops == 0)
1235 i = atoi (ix86_align_loops_string);
1236 if (i < 0 || i > MAX_CODE_ALIGN)
1237 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1239 align_loops = 1 << i;
1243 if (ix86_align_jumps_string)
1245 warning ("-malign-jumps is obsolete, use -falign-jumps");
1246 if (align_jumps == 0)
1248 i = atoi (ix86_align_jumps_string);
1249 if (i < 0 || i > MAX_CODE_ALIGN)
1250 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1252 align_jumps = 1 << i;
1256 if (ix86_align_funcs_string)
1258 warning ("-malign-functions is obsolete, use -falign-functions");
1259 if (align_functions == 0)
1261 i = atoi (ix86_align_funcs_string);
1262 if (i < 0 || i > MAX_CODE_ALIGN)
1263 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1265 align_functions = 1 << i;
1269 /* Default align_* from the processor table. */
1270 if (align_loops == 0)
1272 align_loops = processor_target_table[ix86_cpu].align_loop;
1273 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1275 if (align_jumps == 0)
1277 align_jumps = processor_target_table[ix86_cpu].align_jump;
1278 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1280 if (align_functions == 0)
1282 align_functions = processor_target_table[ix86_cpu].align_func;
1285 /* Validate -mpreferred-stack-boundary= value, or provide default.
1286 The default of 128 bits is for Pentium III's SSE __m128, but we
1287 don't want additional code to keep the stack aligned when
1288 optimizing for code size. */
1289 ix86_preferred_stack_boundary = (optimize_size
1290 ? TARGET_64BIT ? 128 : 32
1292 if (ix86_preferred_stack_boundary_string)
1294 i = atoi (ix86_preferred_stack_boundary_string);
1295 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1296 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1297 TARGET_64BIT ? 4 : 2);
1299 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1302 /* Validate -mbranch-cost= value, or provide default. */
1303 ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
1304 if (ix86_branch_cost_string)
1306 i = atoi (ix86_branch_cost_string);
1308 error ("-mbranch-cost=%d is not between 0 and 5", i);
1310 ix86_branch_cost = i;
1313 if (ix86_tls_dialect_string)
1315 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1316 ix86_tls_dialect = TLS_DIALECT_GNU;
1317 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1318 ix86_tls_dialect = TLS_DIALECT_SUN;
1320 error ("bad value (%s) for -mtls-dialect= switch",
1321 ix86_tls_dialect_string);
1324 /* Keep nonleaf frame pointers. */
1325 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1326 flag_omit_frame_pointer = 1;
1328 /* If we're doing fast math, we don't care about comparison order
1329 wrt NaNs. This lets us use a shorter comparison sequence. */
1330 if (flag_unsafe_math_optimizations)
1331 target_flags &= ~MASK_IEEE_FP;
1333 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1334 since the insns won't need emulation. */
1335 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1336 target_flags &= ~MASK_NO_FANCY_MATH_387;
1340 if (TARGET_ALIGN_DOUBLE)
1341 error ("-malign-double makes no sense in the 64bit mode");
1343 error ("-mrtd calling convention not supported in the 64bit mode");
1344 /* Enable by default the SSE and MMX builtins. */
1345 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1346 ix86_fpmath = FPMATH_SSE;
1349 ix86_fpmath = FPMATH_387;
1351 if (ix86_fpmath_string != 0)
1353 if (! strcmp (ix86_fpmath_string, "387"))
1354 ix86_fpmath = FPMATH_387;
1355 else if (! strcmp (ix86_fpmath_string, "sse"))
1359 warning ("SSE instruction set disabled, using 387 arithmetics");
1360 ix86_fpmath = FPMATH_387;
1363 ix86_fpmath = FPMATH_SSE;
1365 else if (! strcmp (ix86_fpmath_string, "387,sse")
1366 || ! strcmp (ix86_fpmath_string, "sse,387"))
1370 warning ("SSE instruction set disabled, using 387 arithmetics");
1371 ix86_fpmath = FPMATH_387;
1373 else if (!TARGET_80387)
1375 warning ("387 instruction set disabled, using SSE arithmetics");
1376 ix86_fpmath = FPMATH_SSE;
1379 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1382 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1385 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1389 target_flags |= MASK_MMX;
1390 x86_prefetch_sse = true;
1393 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1396 target_flags |= MASK_MMX;
1397 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1398 extensions it adds. */
1399 if (x86_3dnow_a & (1 << ix86_arch))
1400 target_flags |= MASK_3DNOW_A;
1402 if ((x86_accumulate_outgoing_args & CPUMASK)
1403 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1405 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1407 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1410 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1411 p = strchr (internal_label_prefix, 'X');
1412 internal_label_prefix_len = p - internal_label_prefix;
1418 optimization_options (level, size)
1420 int size ATTRIBUTE_UNUSED;
1422 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1423 make the problem with not enough registers even worse. */
1424 #ifdef INSN_SCHEDULING
1426 flag_schedule_insns = 0;
1429 /* The default values of these switches depend on the TARGET_64BIT
1430 that is not known at this moment. Mark these values with 2 and
1431 let user the to override these. In case there is no command line option
1432 specifying them, we will set the defaults in override_options. */
1434 flag_omit_frame_pointer = 2;
1435 flag_pcc_struct_return = 2;
1436 flag_asynchronous_unwind_tables = 2;
1439 /* Table of valid machine attributes. */
1440 const struct attribute_spec ix86_attribute_table[] =
1442 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1443 /* Stdcall attribute says callee is responsible for popping arguments
1444 if they are not variable. */
1445 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1446 /* Fastcall attribute says callee is responsible for popping arguments
1447 if they are not variable. */
1448 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1449 /* Cdecl attribute says the callee is a normal C declaration */
1450 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1451 /* Regparm attribute specifies how many integer arguments are to be
1452 passed in registers. */
1453 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1454 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1455 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1456 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1457 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1459 { NULL, 0, 0, false, false, false, NULL }
1462 /* If PIC, we cannot make sibling calls to global functions
1463 because the PLT requires %ebx live.
1464 If we are returning floats on the register stack, we cannot make
1465 sibling calls to functions that return floats. (The stack adjust
1466 instruction will wind up after the sibcall jump, and not be executed.) */
1469 ix86_function_ok_for_sibcall (decl, exp)
1473 /* If we are generating position-independent code, we cannot sibcall
1474 optimize any indirect call, or a direct call to a global function,
1475 as the PLT requires %ebx be live. */
1476 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1479 /* If we are returning floats on the 80387 register stack, we cannot
1480 make a sibcall from a function that doesn't return a float to a
1481 function that does; the necessary stack adjustment will not be
1483 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1484 && ! STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1487 /* If this call is indirect, we'll need to be able to use a call-clobbered
1488 register for the address of the target function. Make sure that all
1489 such registers are not used for passing parameters. */
1490 if (!decl && !TARGET_64BIT)
1492 int regparm = ix86_regparm;
1495 /* We're looking at the CALL_EXPR, we need the type of the function. */
1496 type = TREE_OPERAND (exp, 0); /* pointer expression */
1497 type = TREE_TYPE (type); /* pointer type */
1498 type = TREE_TYPE (type); /* function type */
1500 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1502 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1506 /* ??? Need to count the actual number of registers to be used,
1507 not the possible number of registers. Fix later. */
1512 /* Otherwise okay. That also includes certain types of indirect calls. */
1516 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1517 arguments as in struct attribute_spec.handler. */
1519 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1522 tree args ATTRIBUTE_UNUSED;
1523 int flags ATTRIBUTE_UNUSED;
1526 if (TREE_CODE (*node) != FUNCTION_TYPE
1527 && TREE_CODE (*node) != METHOD_TYPE
1528 && TREE_CODE (*node) != FIELD_DECL
1529 && TREE_CODE (*node) != TYPE_DECL)
1531 warning ("`%s' attribute only applies to functions",
1532 IDENTIFIER_POINTER (name));
1533 *no_add_attrs = true;
1537 if (is_attribute_p ("fastcall", name))
1539 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1541 error ("fastcall and stdcall attributes are not compatible");
1543 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1545 error ("fastcall and regparm attributes are not compatible");
1548 else if (is_attribute_p ("stdcall", name))
1550 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1552 error ("fastcall and stdcall attributes are not compatible");
1559 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1560 *no_add_attrs = true;
1566 /* Handle a "regparm" attribute;
1567 arguments as in struct attribute_spec.handler. */
1569 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1573 int flags ATTRIBUTE_UNUSED;
1576 if (TREE_CODE (*node) != FUNCTION_TYPE
1577 && TREE_CODE (*node) != METHOD_TYPE
1578 && TREE_CODE (*node) != FIELD_DECL
1579 && TREE_CODE (*node) != TYPE_DECL)
1581 warning ("`%s' attribute only applies to functions",
1582 IDENTIFIER_POINTER (name));
1583 *no_add_attrs = true;
1589 cst = TREE_VALUE (args);
1590 if (TREE_CODE (cst) != INTEGER_CST)
1592 warning ("`%s' attribute requires an integer constant argument",
1593 IDENTIFIER_POINTER (name));
1594 *no_add_attrs = true;
1596 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1598 warning ("argument to `%s' attribute larger than %d",
1599 IDENTIFIER_POINTER (name), REGPARM_MAX);
1600 *no_add_attrs = true;
1603 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1605 error ("fastcall and regparm attributes are not compatible");
1612 /* Return 0 if the attributes for two types are incompatible, 1 if they
1613 are compatible, and 2 if they are nearly compatible (which causes a
1614 warning to be generated). */
1617 ix86_comp_type_attributes (type1, type2)
1621 /* Check for mismatch of non-default calling convention. */
1622 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1624 if (TREE_CODE (type1) != FUNCTION_TYPE)
1627 /* Check for mismatched fastcall types */
1628 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1629 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1632 /* Check for mismatched return types (cdecl vs stdcall). */
1633 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1634 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1639 /* Return the regparm value for a fuctio with the indicated TYPE. */
1642 ix86_fntype_regparm (type)
1647 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1649 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1651 return ix86_regparm;
1654 /* Value is the number of bytes of arguments automatically
1655 popped when returning from a subroutine call.
1656 FUNDECL is the declaration node of the function (as a tree),
1657 FUNTYPE is the data type of the function (as a tree),
1658 or for a library call it is an identifier node for the subroutine name.
1659 SIZE is the number of bytes of arguments passed on the stack.
1661 On the 80386, the RTD insn may be used to pop them if the number
1662 of args is fixed, but if the number is variable then the caller
1663 must pop them all. RTD can't be used for library calls now
1664 because the library is compiled with the Unix compiler.
1665 Use of RTD is a selectable option, since it is incompatible with
1666 standard Unix calling sequences. If the option is not selected,
1667 the caller must always pop the args.
1669 The attribute stdcall is equivalent to RTD on a per module basis. */
1672 ix86_return_pops_args (fundecl, funtype, size)
1677 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1679 /* Cdecl functions override -mrtd, and never pop the stack. */
1680 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1682 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1683 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1684 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1688 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1689 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1690 == void_type_node)))
1694 /* Lose any fake structure return argument if it is passed on the stack. */
1695 if (aggregate_value_p (TREE_TYPE (funtype))
1698 int nregs = ix86_fntype_regparm (funtype);
1701 return GET_MODE_SIZE (Pmode);
1707 /* Argument support functions. */
1709 /* Return true when register may be used to pass function parameters. */
1711 ix86_function_arg_regno_p (regno)
1716 return (regno < REGPARM_MAX
1717 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1718 if (SSE_REGNO_P (regno) && TARGET_SSE)
1720 /* RAX is used as hidden argument to va_arg functions. */
1723 for (i = 0; i < REGPARM_MAX; i++)
1724 if (regno == x86_64_int_parameter_registers[i])
1729 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1730 for a call to a function whose data type is FNTYPE.
1731 For a library call, FNTYPE is 0. */
1734 init_cumulative_args (cum, fntype, libname)
1735 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1736 tree fntype; /* tree ptr for function decl */
1737 rtx libname; /* SYMBOL_REF of library name or 0 */
1739 static CUMULATIVE_ARGS zero_cum;
1740 tree param, next_param;
1742 if (TARGET_DEBUG_ARG)
1744 fprintf (stderr, "\ninit_cumulative_args (");
1746 fprintf (stderr, "fntype code = %s, ret code = %s",
1747 tree_code_name[(int) TREE_CODE (fntype)],
1748 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1750 fprintf (stderr, "no fntype");
1753 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1758 /* Set up the number of registers to use for passing arguments. */
1759 cum->nregs = ix86_regparm;
1760 cum->sse_nregs = SSE_REGPARM_MAX;
1761 if (fntype && !TARGET_64BIT)
1763 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1766 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1768 cum->maybe_vaarg = false;
1770 /* Use ecx and edx registers if function has fastcall attribute */
1771 if (fntype && !TARGET_64BIT)
1773 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1781 /* Determine if this function has variable arguments. This is
1782 indicated by the last argument being 'void_type_mode' if there
1783 are no variable arguments. If there are variable arguments, then
1784 we won't pass anything in registers */
1788 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1789 param != 0; param = next_param)
1791 next_param = TREE_CHAIN (param);
1792 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1799 cum->maybe_vaarg = true;
1803 if ((!fntype && !libname)
1804 || (fntype && !TYPE_ARG_TYPES (fntype)))
1805 cum->maybe_vaarg = 1;
1807 if (TARGET_DEBUG_ARG)
1808 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1813 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1814 of this code is to classify each 8bytes of incoming argument by the register
1815 class and assign registers accordingly. */
1817 /* Return the union class of CLASS1 and CLASS2.
1818 See the x86-64 PS ABI for details. */
1820 static enum x86_64_reg_class
1821 merge_classes (class1, class2)
1822 enum x86_64_reg_class class1, class2;
1824 /* Rule #1: If both classes are equal, this is the resulting class. */
1825 if (class1 == class2)
1828 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1830 if (class1 == X86_64_NO_CLASS)
1832 if (class2 == X86_64_NO_CLASS)
1835 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1836 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1837 return X86_64_MEMORY_CLASS;
1839 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1840 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1841 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1842 return X86_64_INTEGERSI_CLASS;
1843 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1844 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1845 return X86_64_INTEGER_CLASS;
1847 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1848 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1849 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1850 return X86_64_MEMORY_CLASS;
1852 /* Rule #6: Otherwise class SSE is used. */
1853 return X86_64_SSE_CLASS;
1856 /* Classify the argument of type TYPE and mode MODE.
1857 CLASSES will be filled by the register class used to pass each word
1858 of the operand. The number of words is returned. In case the parameter
1859 should be passed in memory, 0 is returned. As a special case for zero
1860 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1862 BIT_OFFSET is used internally for handling records and specifies offset
1863 of the offset in bits modulo 256 to avoid overflow cases.
1865 See the x86-64 PS ABI for details.
1869 classify_argument (mode, type, classes, bit_offset)
1870 enum machine_mode mode;
1872 enum x86_64_reg_class classes[MAX_CLASSES];
1876 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1877 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1879 /* Variable sized entities are always passed/returned in memory. */
1883 if (type && AGGREGATE_TYPE_P (type))
1887 enum x86_64_reg_class subclasses[MAX_CLASSES];
1889 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1893 for (i = 0; i < words; i++)
1894 classes[i] = X86_64_NO_CLASS;
1896 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1897 signalize memory class, so handle it as special case. */
1900 classes[0] = X86_64_NO_CLASS;
1904 /* Classify each field of record and merge classes. */
1905 if (TREE_CODE (type) == RECORD_TYPE)
1907 /* For classes first merge in the field of the subclasses. */
1908 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1910 tree bases = TYPE_BINFO_BASETYPES (type);
1911 int n_bases = TREE_VEC_LENGTH (bases);
1914 for (i = 0; i < n_bases; ++i)
1916 tree binfo = TREE_VEC_ELT (bases, i);
1918 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1919 tree type = BINFO_TYPE (binfo);
1921 num = classify_argument (TYPE_MODE (type),
1923 (offset + bit_offset) % 256);
1926 for (i = 0; i < num; i++)
1928 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1930 merge_classes (subclasses[i], classes[i + pos]);
1934 /* And now merge the fields of structure. */
1935 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1937 if (TREE_CODE (field) == FIELD_DECL)
1941 /* Bitfields are always classified as integer. Handle them
1942 early, since later code would consider them to be
1943 misaligned integers. */
1944 if (DECL_BIT_FIELD (field))
1946 for (i = int_bit_position (field) / 8 / 8;
1947 i < (int_bit_position (field)
1948 + tree_low_cst (DECL_SIZE (field), 0)
1951 merge_classes (X86_64_INTEGER_CLASS,
1956 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1957 TREE_TYPE (field), subclasses,
1958 (int_bit_position (field)
1959 + bit_offset) % 256);
1962 for (i = 0; i < num; i++)
1965 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1967 merge_classes (subclasses[i], classes[i + pos]);
1973 /* Arrays are handled as small records. */
1974 else if (TREE_CODE (type) == ARRAY_TYPE)
1977 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1978 TREE_TYPE (type), subclasses, bit_offset);
1982 /* The partial classes are now full classes. */
1983 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1984 subclasses[0] = X86_64_SSE_CLASS;
1985 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1986 subclasses[0] = X86_64_INTEGER_CLASS;
1988 for (i = 0; i < words; i++)
1989 classes[i] = subclasses[i % num];
1991 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1992 else if (TREE_CODE (type) == UNION_TYPE
1993 || TREE_CODE (type) == QUAL_UNION_TYPE)
1995 /* For classes first merge in the field of the subclasses. */
1996 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1998 tree bases = TYPE_BINFO_BASETYPES (type);
1999 int n_bases = TREE_VEC_LENGTH (bases);
2002 for (i = 0; i < n_bases; ++i)
2004 tree binfo = TREE_VEC_ELT (bases, i);
2006 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2007 tree type = BINFO_TYPE (binfo);
2009 num = classify_argument (TYPE_MODE (type),
2011 (offset + (bit_offset % 64)) % 256);
2014 for (i = 0; i < num; i++)
2016 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2018 merge_classes (subclasses[i], classes[i + pos]);
2022 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2024 if (TREE_CODE (field) == FIELD_DECL)
2027 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2028 TREE_TYPE (field), subclasses,
2032 for (i = 0; i < num; i++)
2033 classes[i] = merge_classes (subclasses[i], classes[i]);
2040 /* Final merger cleanup. */
2041 for (i = 0; i < words; i++)
2043 /* If one class is MEMORY, everything should be passed in
2045 if (classes[i] == X86_64_MEMORY_CLASS)
2048 /* The X86_64_SSEUP_CLASS should be always preceded by
2049 X86_64_SSE_CLASS. */
2050 if (classes[i] == X86_64_SSEUP_CLASS
2051 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2052 classes[i] = X86_64_SSE_CLASS;
2054 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2055 if (classes[i] == X86_64_X87UP_CLASS
2056 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2057 classes[i] = X86_64_SSE_CLASS;
2062 /* Compute alignment needed. We align all types to natural boundaries with
2063 exception of XFmode that is aligned to 64bits. */
2064 if (mode != VOIDmode && mode != BLKmode)
2066 int mode_alignment = GET_MODE_BITSIZE (mode);
2069 mode_alignment = 128;
2070 else if (mode == XCmode)
2071 mode_alignment = 256;
2072 /* Misaligned fields are always returned in memory. */
2073 if (bit_offset % mode_alignment)
2077 /* Classification of atomic types. */
2087 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2088 classes[0] = X86_64_INTEGERSI_CLASS;
2090 classes[0] = X86_64_INTEGER_CLASS;
2094 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2097 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2098 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2101 if (!(bit_offset % 64))
2102 classes[0] = X86_64_SSESF_CLASS;
2104 classes[0] = X86_64_SSE_CLASS;
2107 classes[0] = X86_64_SSEDF_CLASS;
2110 classes[0] = X86_64_X87_CLASS;
2111 classes[1] = X86_64_X87UP_CLASS;
2114 classes[0] = X86_64_X87_CLASS;
2115 classes[1] = X86_64_X87UP_CLASS;
2116 classes[2] = X86_64_X87_CLASS;
2117 classes[3] = X86_64_X87UP_CLASS;
2120 classes[0] = X86_64_SSEDF_CLASS;
2121 classes[1] = X86_64_SSEDF_CLASS;
2124 classes[0] = X86_64_SSE_CLASS;
2132 classes[0] = X86_64_SSE_CLASS;
2133 classes[1] = X86_64_SSEUP_CLASS;
2148 /* Examine the argument and return set number of register required in each
2149 class. Return 0 iff parameter should be passed in memory. */
2151 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2152 enum machine_mode mode;
2154 int *int_nregs, *sse_nregs;
2157 enum x86_64_reg_class class[MAX_CLASSES];
2158 int n = classify_argument (mode, type, class, 0);
2164 for (n--; n >= 0; n--)
2167 case X86_64_INTEGER_CLASS:
2168 case X86_64_INTEGERSI_CLASS:
2171 case X86_64_SSE_CLASS:
2172 case X86_64_SSESF_CLASS:
2173 case X86_64_SSEDF_CLASS:
2176 case X86_64_NO_CLASS:
2177 case X86_64_SSEUP_CLASS:
2179 case X86_64_X87_CLASS:
2180 case X86_64_X87UP_CLASS:
2184 case X86_64_MEMORY_CLASS:
2189 /* Construct container for the argument used by GCC interface. See
2190 FUNCTION_ARG for the detailed description. */
2192 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2193 enum machine_mode mode;
2196 int nintregs, nsseregs;
2200 enum machine_mode tmpmode;
2202 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2203 enum x86_64_reg_class class[MAX_CLASSES];
2207 int needed_sseregs, needed_intregs;
2208 rtx exp[MAX_CLASSES];
2211 n = classify_argument (mode, type, class, 0);
2212 if (TARGET_DEBUG_ARG)
2215 fprintf (stderr, "Memory class\n");
2218 fprintf (stderr, "Classes:");
2219 for (i = 0; i < n; i++)
2221 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2223 fprintf (stderr, "\n");
2228 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2230 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2233 /* First construct simple cases. Avoid SCmode, since we want to use
2234 single register to pass this type. */
2235 if (n == 1 && mode != SCmode)
2238 case X86_64_INTEGER_CLASS:
2239 case X86_64_INTEGERSI_CLASS:
2240 return gen_rtx_REG (mode, intreg[0]);
2241 case X86_64_SSE_CLASS:
2242 case X86_64_SSESF_CLASS:
2243 case X86_64_SSEDF_CLASS:
2244 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2245 case X86_64_X87_CLASS:
2246 return gen_rtx_REG (mode, FIRST_STACK_REG);
2247 case X86_64_NO_CLASS:
2248 /* Zero sized array, struct or class. */
2253 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2254 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2256 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2257 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2258 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2259 && class[1] == X86_64_INTEGER_CLASS
2260 && (mode == CDImode || mode == TImode)
2261 && intreg[0] + 1 == intreg[1])
2262 return gen_rtx_REG (mode, intreg[0]);
2264 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2265 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2266 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2268 /* Otherwise figure out the entries of the PARALLEL. */
2269 for (i = 0; i < n; i++)
2273 case X86_64_NO_CLASS:
2275 case X86_64_INTEGER_CLASS:
2276 case X86_64_INTEGERSI_CLASS:
2277 /* Merge TImodes on aligned occasions here too. */
2278 if (i * 8 + 8 > bytes)
2279 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2280 else if (class[i] == X86_64_INTEGERSI_CLASS)
2284 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2285 if (tmpmode == BLKmode)
2287 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2288 gen_rtx_REG (tmpmode, *intreg),
2292 case X86_64_SSESF_CLASS:
2293 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2294 gen_rtx_REG (SFmode,
2295 SSE_REGNO (sse_regno)),
2299 case X86_64_SSEDF_CLASS:
2300 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2301 gen_rtx_REG (DFmode,
2302 SSE_REGNO (sse_regno)),
2306 case X86_64_SSE_CLASS:
2307 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2311 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2312 gen_rtx_REG (tmpmode,
2313 SSE_REGNO (sse_regno)),
2315 if (tmpmode == TImode)
2323 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2324 for (i = 0; i < nexps; i++)
2325 XVECEXP (ret, 0, i) = exp [i];
2329 /* Update the data in CUM to advance over an argument
2330 of mode MODE and data type TYPE.
2331 (TYPE is null for libcalls where that information may not be available.) */
2334 function_arg_advance (cum, mode, type, named)
2335 CUMULATIVE_ARGS *cum; /* current arg information */
2336 enum machine_mode mode; /* current arg mode */
2337 tree type; /* type of the argument or 0 if lib support */
2338 int named; /* whether or not the argument was named */
2341 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2342 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2344 if (TARGET_DEBUG_ARG)
2346 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2347 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2350 int int_nregs, sse_nregs;
2351 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2352 cum->words += words;
2353 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2355 cum->nregs -= int_nregs;
2356 cum->sse_nregs -= sse_nregs;
2357 cum->regno += int_nregs;
2358 cum->sse_regno += sse_nregs;
2361 cum->words += words;
2365 if (TARGET_SSE && mode == TImode)
2367 cum->sse_words += words;
2368 cum->sse_nregs -= 1;
2369 cum->sse_regno += 1;
2370 if (cum->sse_nregs <= 0)
2378 cum->words += words;
2379 cum->nregs -= words;
2380 cum->regno += words;
2382 if (cum->nregs <= 0)
2392 /* Define where to put the arguments to a function.
2393 Value is zero to push the argument on the stack,
2394 or a hard register in which to store the argument.
2396 MODE is the argument's machine mode.
2397 TYPE is the data type of the argument (as a tree).
2398 This is null for libcalls where that information may
2400 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2401 the preceding args and about the function being called.
2402 NAMED is nonzero if this argument is a named parameter
2403 (otherwise it is an extra parameter matching an ellipsis). */
2406 function_arg (cum, mode, type, named)
2407 CUMULATIVE_ARGS *cum; /* current arg information */
2408 enum machine_mode mode; /* current arg mode */
2409 tree type; /* type of the argument or 0 if lib support */
2410 int named; /* != 0 for normal args, == 0 for ... args */
2414 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2415 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2417 /* Handle a hidden AL argument containing number of registers for varargs
2418 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2420 if (mode == VOIDmode)
2423 return GEN_INT (cum->maybe_vaarg
2424 ? (cum->sse_nregs < 0
2432 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2433 &x86_64_int_parameter_registers [cum->regno],
2438 /* For now, pass fp/complex values on the stack. */
2447 if (words <= cum->nregs)
2449 int regno = cum->regno;
2451 /* Fastcall allocates the first two DWORD (SImode) or
2452 smaller arguments to ECX and EDX. */
2455 if (mode == BLKmode || mode == DImode)
2458 /* ECX not EAX is the first allocated register. */
2462 ret = gen_rtx_REG (mode, regno);
2467 ret = gen_rtx_REG (mode, cum->sse_regno);
2471 if (TARGET_DEBUG_ARG)
2474 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2475 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2478 print_simple_rtl (stderr, ret);
2480 fprintf (stderr, ", stack");
2482 fprintf (stderr, " )\n");
2488 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2492 ix86_function_arg_boundary (mode, type)
2493 enum machine_mode mode;
2498 return PARM_BOUNDARY;
2500 align = TYPE_ALIGN (type);
2502 align = GET_MODE_ALIGNMENT (mode);
2503 if (align < PARM_BOUNDARY)
2504 align = PARM_BOUNDARY;
2510 /* Return true if N is a possible register number of function value. */
2512 ix86_function_value_regno_p (regno)
2517 return ((regno) == 0
2518 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2519 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2521 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2522 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2523 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2526 /* Define how to find the value returned by a function.
2527 VALTYPE is the data type of the value (as a tree).
2528 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2529 otherwise, FUNC is 0. */
2531 ix86_function_value (valtype)
2536 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2537 REGPARM_MAX, SSE_REGPARM_MAX,
2538 x86_64_int_return_registers, 0);
2539 /* For zero sized structures, construct_container return NULL, but we need
2540 to keep rest of compiler happy by returning meaningful value. */
2542 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2546 return gen_rtx_REG (TYPE_MODE (valtype),
2547 ix86_value_regno (TYPE_MODE (valtype)));
2550 /* Return false iff type is returned in memory. */
2552 ix86_return_in_memory (type)
2555 int needed_intregs, needed_sseregs;
2558 return !examine_argument (TYPE_MODE (type), type, 1,
2559 &needed_intregs, &needed_sseregs);
2563 if (TYPE_MODE (type) == BLKmode
2564 || (VECTOR_MODE_P (TYPE_MODE (type))
2565 && int_size_in_bytes (type) == 8)
2566 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2567 && TYPE_MODE (type) != TFmode
2568 && !VECTOR_MODE_P (TYPE_MODE (type))))
2574 /* Define how to find the value returned by a library function
2575 assuming the value has mode MODE. */
2577 ix86_libcall_value (mode)
2578 enum machine_mode mode;
2588 return gen_rtx_REG (mode, FIRST_SSE_REG);
2591 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2593 return gen_rtx_REG (mode, 0);
2597 return gen_rtx_REG (mode, ix86_value_regno (mode));
2600 /* Given a mode, return the register to use for a return value. */
2603 ix86_value_regno (mode)
2604 enum machine_mode mode;
2606 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2607 return FIRST_FLOAT_REG;
2608 if (mode == TImode || VECTOR_MODE_P (mode))
2609 return FIRST_SSE_REG;
2613 /* Create the va_list data type. */
2616 ix86_build_va_list ()
2618 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2620 /* For i386 we use plain pointer to argument area. */
2622 return build_pointer_type (char_type_node);
2624 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2625 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2627 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2628 unsigned_type_node);
2629 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2630 unsigned_type_node);
2631 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2633 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2636 DECL_FIELD_CONTEXT (f_gpr) = record;
2637 DECL_FIELD_CONTEXT (f_fpr) = record;
2638 DECL_FIELD_CONTEXT (f_ovf) = record;
2639 DECL_FIELD_CONTEXT (f_sav) = record;
2641 TREE_CHAIN (record) = type_decl;
2642 TYPE_NAME (record) = type_decl;
2643 TYPE_FIELDS (record) = f_gpr;
2644 TREE_CHAIN (f_gpr) = f_fpr;
2645 TREE_CHAIN (f_fpr) = f_ovf;
2646 TREE_CHAIN (f_ovf) = f_sav;
2648 layout_type (record);
2650 /* The correct type is an array type of one element. */
2651 return build_array_type (record, build_index_type (size_zero_node));
2654 /* Perform any needed actions needed for a function that is receiving a
2655 variable number of arguments.
2659 MODE and TYPE are the mode and type of the current parameter.
2661 PRETEND_SIZE is a variable that should be set to the amount of stack
2662 that must be pushed by the prolog to pretend that our caller pushed
2665 Normally, this macro will push all remaining incoming registers on the
2666 stack and set PRETEND_SIZE to the length of the registers pushed. */
2669 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2670 CUMULATIVE_ARGS *cum;
2671 enum machine_mode mode;
2673 int *pretend_size ATTRIBUTE_UNUSED;
2677 CUMULATIVE_ARGS next_cum;
2678 rtx save_area = NULL_RTX, mem;
2691 /* Indicate to allocate space on the stack for varargs save area. */
2692 ix86_save_varrargs_registers = 1;
2694 fntype = TREE_TYPE (current_function_decl);
2695 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2696 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2697 != void_type_node));
2699 /* For varargs, we do not want to skip the dummy va_dcl argument.
2700 For stdargs, we do want to skip the last named argument. */
2703 function_arg_advance (&next_cum, mode, type, 1);
2706 save_area = frame_pointer_rtx;
2708 set = get_varargs_alias_set ();
2710 for (i = next_cum.regno; i < ix86_regparm; i++)
2712 mem = gen_rtx_MEM (Pmode,
2713 plus_constant (save_area, i * UNITS_PER_WORD));
2714 set_mem_alias_set (mem, set);
2715 emit_move_insn (mem, gen_rtx_REG (Pmode,
2716 x86_64_int_parameter_registers[i]));
2719 if (next_cum.sse_nregs)
2721 /* Now emit code to save SSE registers. The AX parameter contains number
2722 of SSE parameter registers used to call this function. We use
2723 sse_prologue_save insn template that produces computed jump across
2724 SSE saves. We need some preparation work to get this working. */
2726 label = gen_label_rtx ();
2727 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2729 /* Compute address to jump to :
2730 label - 5*eax + nnamed_sse_arguments*5 */
2731 tmp_reg = gen_reg_rtx (Pmode);
2732 nsse_reg = gen_reg_rtx (Pmode);
2733 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2734 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2735 gen_rtx_MULT (Pmode, nsse_reg,
2737 if (next_cum.sse_regno)
2740 gen_rtx_CONST (DImode,
2741 gen_rtx_PLUS (DImode,
2743 GEN_INT (next_cum.sse_regno * 4))));
2745 emit_move_insn (nsse_reg, label_ref);
2746 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2748 /* Compute address of memory block we save into. We always use pointer
2749 pointing 127 bytes after first byte to store - this is needed to keep
2750 instruction size limited by 4 bytes. */
2751 tmp_reg = gen_reg_rtx (Pmode);
2752 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2753 plus_constant (save_area,
2754 8 * REGPARM_MAX + 127)));
2755 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2756 set_mem_alias_set (mem, set);
2757 set_mem_align (mem, BITS_PER_WORD);
2759 /* And finally do the dirty job! */
2760 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2761 GEN_INT (next_cum.sse_regno), label));
2766 /* Implement va_start. */
2769 ix86_va_start (valist, nextarg)
2773 HOST_WIDE_INT words, n_gpr, n_fpr;
2774 tree f_gpr, f_fpr, f_ovf, f_sav;
2775 tree gpr, fpr, ovf, sav, t;
2777 /* Only 64bit target needs something special. */
2780 std_expand_builtin_va_start (valist, nextarg);
2784 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2785 f_fpr = TREE_CHAIN (f_gpr);
2786 f_ovf = TREE_CHAIN (f_fpr);
2787 f_sav = TREE_CHAIN (f_ovf);
2789 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2790 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2791 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2792 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2793 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2795 /* Count number of gp and fp argument registers used. */
2796 words = current_function_args_info.words;
2797 n_gpr = current_function_args_info.regno;
2798 n_fpr = current_function_args_info.sse_regno;
2800 if (TARGET_DEBUG_ARG)
2801 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2802 (int) words, (int) n_gpr, (int) n_fpr);
2804 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2805 build_int_2 (n_gpr * 8, 0));
2806 TREE_SIDE_EFFECTS (t) = 1;
2807 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2809 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2810 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2811 TREE_SIDE_EFFECTS (t) = 1;
2812 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2814 /* Find the overflow area. */
2815 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2817 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2818 build_int_2 (words * UNITS_PER_WORD, 0));
2819 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2820 TREE_SIDE_EFFECTS (t) = 1;
2821 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2823 /* Find the register save area.
2824 Prologue of the function save it right above stack frame. */
2825 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2826 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2827 TREE_SIDE_EFFECTS (t) = 1;
2828 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2831 /* Implement va_arg. */
2833 ix86_va_arg (valist, type)
2836 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2837 tree f_gpr, f_fpr, f_ovf, f_sav;
2838 tree gpr, fpr, ovf, sav, t;
2840 rtx lab_false, lab_over = NULL_RTX;
2844 /* Only 64bit target needs something special. */
2847 return std_expand_builtin_va_arg (valist, type);
2850 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2851 f_fpr = TREE_CHAIN (f_gpr);
2852 f_ovf = TREE_CHAIN (f_fpr);
2853 f_sav = TREE_CHAIN (f_ovf);
2855 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2856 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2857 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2858 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2859 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2861 size = int_size_in_bytes (type);
2862 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2864 container = construct_container (TYPE_MODE (type), type, 0,
2865 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2867 * Pull the value out of the saved registers ...
2870 addr_rtx = gen_reg_rtx (Pmode);
2874 rtx int_addr_rtx, sse_addr_rtx;
2875 int needed_intregs, needed_sseregs;
2878 lab_over = gen_label_rtx ();
2879 lab_false = gen_label_rtx ();
2881 examine_argument (TYPE_MODE (type), type, 0,
2882 &needed_intregs, &needed_sseregs);
2885 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2886 || TYPE_ALIGN (type) > 128);
2888 /* In case we are passing structure, verify that it is consecutive block
2889 on the register save area. If not we need to do moves. */
2890 if (!need_temp && !REG_P (container))
2892 /* Verify that all registers are strictly consecutive */
2893 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2897 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2899 rtx slot = XVECEXP (container, 0, i);
2900 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2901 || INTVAL (XEXP (slot, 1)) != i * 16)
2909 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2911 rtx slot = XVECEXP (container, 0, i);
2912 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2913 || INTVAL (XEXP (slot, 1)) != i * 8)
2920 int_addr_rtx = addr_rtx;
2921 sse_addr_rtx = addr_rtx;
2925 int_addr_rtx = gen_reg_rtx (Pmode);
2926 sse_addr_rtx = gen_reg_rtx (Pmode);
2928 /* First ensure that we fit completely in registers. */
2931 emit_cmp_and_jump_insns (expand_expr
2932 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2933 GEN_INT ((REGPARM_MAX - needed_intregs +
2934 1) * 8), GE, const1_rtx, SImode,
2939 emit_cmp_and_jump_insns (expand_expr
2940 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2941 GEN_INT ((SSE_REGPARM_MAX -
2942 needed_sseregs + 1) * 16 +
2943 REGPARM_MAX * 8), GE, const1_rtx,
2944 SImode, 1, lab_false);
2947 /* Compute index to start of area used for integer regs. */
2950 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2951 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2952 if (r != int_addr_rtx)
2953 emit_move_insn (int_addr_rtx, r);
2957 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2958 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2959 if (r != sse_addr_rtx)
2960 emit_move_insn (sse_addr_rtx, r);
2967 /* Never use the memory itself, as it has the alias set. */
2968 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2969 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2970 set_mem_alias_set (mem, get_varargs_alias_set ());
2971 set_mem_align (mem, BITS_PER_UNIT);
2973 for (i = 0; i < XVECLEN (container, 0); i++)
2975 rtx slot = XVECEXP (container, 0, i);
2976 rtx reg = XEXP (slot, 0);
2977 enum machine_mode mode = GET_MODE (reg);
2983 if (SSE_REGNO_P (REGNO (reg)))
2985 src_addr = sse_addr_rtx;
2986 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2990 src_addr = int_addr_rtx;
2991 src_offset = REGNO (reg) * 8;
2993 src_mem = gen_rtx_MEM (mode, src_addr);
2994 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2995 src_mem = adjust_address (src_mem, mode, src_offset);
2996 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2997 emit_move_insn (dest_mem, src_mem);
3004 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3005 build_int_2 (needed_intregs * 8, 0));
3006 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3007 TREE_SIDE_EFFECTS (t) = 1;
3008 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3013 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3014 build_int_2 (needed_sseregs * 16, 0));
3015 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3016 TREE_SIDE_EFFECTS (t) = 1;
3017 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3020 emit_jump_insn (gen_jump (lab_over));
3022 emit_label (lab_false);
3025 /* ... otherwise out of the overflow area. */
3027 /* Care for on-stack alignment if needed. */
3028 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3032 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3033 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3034 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3038 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3040 emit_move_insn (addr_rtx, r);
3043 build (PLUS_EXPR, TREE_TYPE (t), t,
3044 build_int_2 (rsize * UNITS_PER_WORD, 0));
3045 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3046 TREE_SIDE_EFFECTS (t) = 1;
3047 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3050 emit_label (lab_over);
3055 /* Return nonzero if OP is either a i387 or SSE fp register. */
3057 any_fp_register_operand (op, mode)
3059 enum machine_mode mode ATTRIBUTE_UNUSED;
3061 return ANY_FP_REG_P (op);
3064 /* Return nonzero if OP is an i387 fp register. */
3066 fp_register_operand (op, mode)
3068 enum machine_mode mode ATTRIBUTE_UNUSED;
3070 return FP_REG_P (op);
3073 /* Return nonzero if OP is a non-fp register_operand. */
3075 register_and_not_any_fp_reg_operand (op, mode)
3077 enum machine_mode mode;
3079 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3082 /* Return nonzero if OP is a register operand other than an
3083 i387 fp register. */
3085 register_and_not_fp_reg_operand (op, mode)
3087 enum machine_mode mode;
3089 return register_operand (op, mode) && !FP_REG_P (op);
3092 /* Return nonzero if OP is general operand representable on x86_64. */
3095 x86_64_general_operand (op, mode)
3097 enum machine_mode mode;
3100 return general_operand (op, mode);
3101 if (nonimmediate_operand (op, mode))
3103 return x86_64_sign_extended_value (op);
3106 /* Return nonzero if OP is general operand representable on x86_64
3107 as either sign extended or zero extended constant. */
3110 x86_64_szext_general_operand (op, mode)
3112 enum machine_mode mode;
3115 return general_operand (op, mode);
3116 if (nonimmediate_operand (op, mode))
3118 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3121 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3124 x86_64_nonmemory_operand (op, mode)
3126 enum machine_mode mode;
3129 return nonmemory_operand (op, mode);
3130 if (register_operand (op, mode))
3132 return x86_64_sign_extended_value (op);
3135 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3138 x86_64_movabs_operand (op, mode)
3140 enum machine_mode mode;
3142 if (!TARGET_64BIT || !flag_pic)
3143 return nonmemory_operand (op, mode);
3144 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3146 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3151 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3154 x86_64_szext_nonmemory_operand (op, mode)
3156 enum machine_mode mode;
3159 return nonmemory_operand (op, mode);
3160 if (register_operand (op, mode))
3162 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3165 /* Return nonzero if OP is immediate operand representable on x86_64. */
3168 x86_64_immediate_operand (op, mode)
3170 enum machine_mode mode;
3173 return immediate_operand (op, mode);
3174 return x86_64_sign_extended_value (op);
3177 /* Return nonzero if OP is immediate operand representable on x86_64. */
3180 x86_64_zext_immediate_operand (op, mode)
3182 enum machine_mode mode ATTRIBUTE_UNUSED;
3184 return x86_64_zero_extended_value (op);
3187 /* Return nonzero if OP is (const_int 1), else return zero. */
3190 const_int_1_operand (op, mode)
3192 enum machine_mode mode ATTRIBUTE_UNUSED;
3194 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3197 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3198 for shift & compare patterns, as shifting by 0 does not change flags),
3199 else return zero. */
3202 const_int_1_31_operand (op, mode)
3204 enum machine_mode mode ATTRIBUTE_UNUSED;
3206 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3209 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3210 reference and a constant. */
3213 symbolic_operand (op, mode)
3215 enum machine_mode mode ATTRIBUTE_UNUSED;
3217 switch (GET_CODE (op))
3225 if (GET_CODE (op) == SYMBOL_REF
3226 || GET_CODE (op) == LABEL_REF
3227 || (GET_CODE (op) == UNSPEC
3228 && (XINT (op, 1) == UNSPEC_GOT
3229 || XINT (op, 1) == UNSPEC_GOTOFF
3230 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3232 if (GET_CODE (op) != PLUS
3233 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3237 if (GET_CODE (op) == SYMBOL_REF
3238 || GET_CODE (op) == LABEL_REF)
3240 /* Only @GOTOFF gets offsets. */
3241 if (GET_CODE (op) != UNSPEC
3242 || XINT (op, 1) != UNSPEC_GOTOFF)
3245 op = XVECEXP (op, 0, 0);
3246 if (GET_CODE (op) == SYMBOL_REF
3247 || GET_CODE (op) == LABEL_REF)
3256 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3259 pic_symbolic_operand (op, mode)
3261 enum machine_mode mode ATTRIBUTE_UNUSED;
3263 if (GET_CODE (op) != CONST)
3268 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3273 if (GET_CODE (op) == UNSPEC)
3275 if (GET_CODE (op) != PLUS
3276 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3279 if (GET_CODE (op) == UNSPEC)
3285 /* Return true if OP is a symbolic operand that resolves locally. */
3288 local_symbolic_operand (op, mode)
3290 enum machine_mode mode ATTRIBUTE_UNUSED;
3292 if (GET_CODE (op) == CONST
3293 && GET_CODE (XEXP (op, 0)) == PLUS
3294 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3295 op = XEXP (XEXP (op, 0), 0);
3297 if (GET_CODE (op) == LABEL_REF)
3300 if (GET_CODE (op) != SYMBOL_REF)
3303 /* These we've been told are local by varasm and encode_section_info
3305 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3308 /* There is, however, a not insubstantial body of code in the rest of
3309 the compiler that assumes it can just stick the results of
3310 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3311 /* ??? This is a hack. Should update the body of the compiler to
3312 always create a DECL an invoke targetm.encode_section_info. */
3313 if (strncmp (XSTR (op, 0), internal_label_prefix,
3314 internal_label_prefix_len) == 0)
3320 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3323 tls_symbolic_operand (op, mode)
3325 enum machine_mode mode ATTRIBUTE_UNUSED;
3327 const char *symbol_str;
3329 if (GET_CODE (op) != SYMBOL_REF)
3331 symbol_str = XSTR (op, 0);
3333 if (symbol_str[0] != '%')
3335 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3339 tls_symbolic_operand_1 (op, kind)
3341 enum tls_model kind;
3343 const char *symbol_str;
3345 if (GET_CODE (op) != SYMBOL_REF)
3347 symbol_str = XSTR (op, 0);
3349 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3353 global_dynamic_symbolic_operand (op, mode)
3355 enum machine_mode mode ATTRIBUTE_UNUSED;
3357 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3361 local_dynamic_symbolic_operand (op, mode)
3363 enum machine_mode mode ATTRIBUTE_UNUSED;
3365 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3369 initial_exec_symbolic_operand (op, mode)
3371 enum machine_mode mode ATTRIBUTE_UNUSED;
3373 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3377 local_exec_symbolic_operand (op, mode)
3379 enum machine_mode mode ATTRIBUTE_UNUSED;
3381 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3384 /* Test for a valid operand for a call instruction. Don't allow the
3385 arg pointer register or virtual regs since they may decay into
3386 reg + const, which the patterns can't handle. */
3389 call_insn_operand (op, mode)
3391 enum machine_mode mode ATTRIBUTE_UNUSED;
3393 /* Disallow indirect through a virtual register. This leads to
3394 compiler aborts when trying to eliminate them. */
3395 if (GET_CODE (op) == REG
3396 && (op == arg_pointer_rtx
3397 || op == frame_pointer_rtx
3398 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3399 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3402 /* Disallow `call 1234'. Due to varying assembler lameness this
3403 gets either rejected or translated to `call .+1234'. */
3404 if (GET_CODE (op) == CONST_INT)
3407 /* Explicitly allow SYMBOL_REF even if pic. */
3408 if (GET_CODE (op) == SYMBOL_REF)
3411 /* Otherwise we can allow any general_operand in the address. */
3412 return general_operand (op, Pmode);
3415 /* Test for a valid operand for a call instruction. Don't allow the
3416 arg pointer register or virtual regs since they may decay into
3417 reg + const, which the patterns can't handle. */
3420 sibcall_insn_operand (op, mode)
3422 enum machine_mode mode ATTRIBUTE_UNUSED;
3424 /* Disallow indirect through a virtual register. This leads to
3425 compiler aborts when trying to eliminate them. */
3426 if (GET_CODE (op) == REG
3427 && (op == arg_pointer_rtx
3428 || op == frame_pointer_rtx
3429 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3430 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3433 /* Explicitly allow SYMBOL_REF even if pic. */
3434 if (GET_CODE (op) == SYMBOL_REF)
3437 /* Otherwise we can only allow register operands. */
3438 return register_operand (op, Pmode);
3442 constant_call_address_operand (op, mode)
3444 enum machine_mode mode ATTRIBUTE_UNUSED;
3446 if (GET_CODE (op) == CONST
3447 && GET_CODE (XEXP (op, 0)) == PLUS
3448 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3449 op = XEXP (XEXP (op, 0), 0);
3450 return GET_CODE (op) == SYMBOL_REF;
3453 /* Match exactly zero and one. */
3456 const0_operand (op, mode)
3458 enum machine_mode mode;
3460 return op == CONST0_RTX (mode);
3464 const1_operand (op, mode)
3466 enum machine_mode mode ATTRIBUTE_UNUSED;
3468 return op == const1_rtx;
3471 /* Match 2, 4, or 8. Used for leal multiplicands. */
3474 const248_operand (op, mode)
3476 enum machine_mode mode ATTRIBUTE_UNUSED;
3478 return (GET_CODE (op) == CONST_INT
3479 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3482 /* True if this is a constant appropriate for an increment or decrement. */
3485 incdec_operand (op, mode)
3487 enum machine_mode mode ATTRIBUTE_UNUSED;
3489 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3490 registers, since carry flag is not set. */
3491 if (TARGET_PENTIUM4 && !optimize_size)
3493 return op == const1_rtx || op == constm1_rtx;
3496 /* Return nonzero if OP is acceptable as operand of DImode shift
3500 shiftdi_operand (op, mode)
3502 enum machine_mode mode ATTRIBUTE_UNUSED;
3505 return nonimmediate_operand (op, mode);
3507 return register_operand (op, mode);
3510 /* Return false if this is the stack pointer, or any other fake
3511 register eliminable to the stack pointer. Otherwise, this is
3514 This is used to prevent esp from being used as an index reg.
3515 Which would only happen in pathological cases. */
3518 reg_no_sp_operand (op, mode)
3520 enum machine_mode mode;
3523 if (GET_CODE (t) == SUBREG)
3525 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3528 return register_operand (op, mode);
3532 mmx_reg_operand (op, mode)
3534 enum machine_mode mode ATTRIBUTE_UNUSED;
3536 return MMX_REG_P (op);
3539 /* Return false if this is any eliminable register. Otherwise
3543 general_no_elim_operand (op, mode)
3545 enum machine_mode mode;
3548 if (GET_CODE (t) == SUBREG)
3550 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3551 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3552 || t == virtual_stack_dynamic_rtx)
3555 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3556 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3559 return general_operand (op, mode);
3562 /* Return false if this is any eliminable register. Otherwise
3563 register_operand or const_int. */
3566 nonmemory_no_elim_operand (op, mode)
3568 enum machine_mode mode;
3571 if (GET_CODE (t) == SUBREG)
3573 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3574 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3575 || t == virtual_stack_dynamic_rtx)
3578 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3581 /* Return false if this is any eliminable register or stack register,
3582 otherwise work like register_operand. */
3585 index_register_operand (op, mode)
3587 enum machine_mode mode;
3590 if (GET_CODE (t) == SUBREG)
3594 if (t == arg_pointer_rtx
3595 || t == frame_pointer_rtx
3596 || t == virtual_incoming_args_rtx
3597 || t == virtual_stack_vars_rtx
3598 || t == virtual_stack_dynamic_rtx
3599 || REGNO (t) == STACK_POINTER_REGNUM)
3602 return general_operand (op, mode);
3605 /* Return true if op is a Q_REGS class register. */
3608 q_regs_operand (op, mode)
3610 enum machine_mode mode;
3612 if (mode != VOIDmode && GET_MODE (op) != mode)
3614 if (GET_CODE (op) == SUBREG)
3615 op = SUBREG_REG (op);
3616 return ANY_QI_REG_P (op);
3619 /* Return true if op is an flags register. */
3622 flags_reg_operand (op, mode)
3624 enum machine_mode mode;
3626 if (mode != VOIDmode && GET_MODE (op) != mode)
3628 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3631 /* Return true if op is a NON_Q_REGS class register. */
3634 non_q_regs_operand (op, mode)
3636 enum machine_mode mode;
3638 if (mode != VOIDmode && GET_MODE (op) != mode)
3640 if (GET_CODE (op) == SUBREG)
3641 op = SUBREG_REG (op);
3642 return NON_QI_REG_P (op);
3646 zero_extended_scalar_load_operand (op, mode)
3648 enum machine_mode mode ATTRIBUTE_UNUSED;
3651 if (GET_CODE (op) != MEM)
3653 op = maybe_get_pool_constant (op);
3656 if (GET_CODE (op) != CONST_VECTOR)
3659 (GET_MODE_SIZE (GET_MODE (op)) /
3660 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3661 for (n_elts--; n_elts > 0; n_elts--)
3663 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3664 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3670 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3673 sse_comparison_operator (op, mode)
3675 enum machine_mode mode ATTRIBUTE_UNUSED;
3677 enum rtx_code code = GET_CODE (op);
3680 /* Operations supported directly. */
3690 /* These are equivalent to ones above in non-IEEE comparisons. */
3697 return !TARGET_IEEE_FP;
3702 /* Return 1 if OP is a valid comparison operator in valid mode. */
3704 ix86_comparison_operator (op, mode)
3706 enum machine_mode mode;
3708 enum machine_mode inmode;
3709 enum rtx_code code = GET_CODE (op);
3710 if (mode != VOIDmode && GET_MODE (op) != mode)
3712 if (GET_RTX_CLASS (code) != '<')
3714 inmode = GET_MODE (XEXP (op, 0));
3716 if (inmode == CCFPmode || inmode == CCFPUmode)
3718 enum rtx_code second_code, bypass_code;
3719 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3720 return (bypass_code == NIL && second_code == NIL);
3727 if (inmode == CCmode || inmode == CCGCmode
3728 || inmode == CCGOCmode || inmode == CCNOmode)
3731 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3732 if (inmode == CCmode)
3736 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3744 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3747 fcmov_comparison_operator (op, mode)
3749 enum machine_mode mode;
3751 enum machine_mode inmode;
3752 enum rtx_code code = GET_CODE (op);
3753 if (mode != VOIDmode && GET_MODE (op) != mode)
3755 if (GET_RTX_CLASS (code) != '<')
3757 inmode = GET_MODE (XEXP (op, 0));
3758 if (inmode == CCFPmode || inmode == CCFPUmode)
3760 enum rtx_code second_code, bypass_code;
3761 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3762 if (bypass_code != NIL || second_code != NIL)
3764 code = ix86_fp_compare_code_to_integer (code);
3766 /* i387 supports just limited amount of conditional codes. */
3769 case LTU: case GTU: case LEU: case GEU:
3770 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3773 case ORDERED: case UNORDERED:
3781 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3784 promotable_binary_operator (op, mode)
3786 enum machine_mode mode ATTRIBUTE_UNUSED;
3788 switch (GET_CODE (op))
3791 /* Modern CPUs have same latency for HImode and SImode multiply,
3792 but 386 and 486 do HImode multiply faster. */
3793 return ix86_cpu > PROCESSOR_I486;
3805 /* Nearly general operand, but accept any const_double, since we wish
3806 to be able to drop them into memory rather than have them get pulled
3810 cmp_fp_expander_operand (op, mode)
3812 enum machine_mode mode;
3814 if (mode != VOIDmode && mode != GET_MODE (op))
3816 if (GET_CODE (op) == CONST_DOUBLE)
3818 return general_operand (op, mode);
3821 /* Match an SI or HImode register for a zero_extract. */
3824 ext_register_operand (op, mode)
3826 enum machine_mode mode ATTRIBUTE_UNUSED;
3829 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3830 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3833 if (!register_operand (op, VOIDmode))
3836 /* Be careful to accept only registers having upper parts. */
3837 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3838 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3841 /* Return 1 if this is a valid binary floating-point operation.
3842 OP is the expression matched, and MODE is its mode. */
3845 binary_fp_operator (op, mode)
3847 enum machine_mode mode;
3849 if (mode != VOIDmode && mode != GET_MODE (op))
3852 switch (GET_CODE (op))
3858 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3866 mult_operator (op, mode)
3868 enum machine_mode mode ATTRIBUTE_UNUSED;
3870 return GET_CODE (op) == MULT;
3874 div_operator (op, mode)
3876 enum machine_mode mode ATTRIBUTE_UNUSED;
3878 return GET_CODE (op) == DIV;
3882 arith_or_logical_operator (op, mode)
3884 enum machine_mode mode;
3886 return ((mode == VOIDmode || GET_MODE (op) == mode)
3887 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3888 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3891 /* Returns 1 if OP is memory operand with a displacement. */
3894 memory_displacement_operand (op, mode)
3896 enum machine_mode mode;
3898 struct ix86_address parts;
3900 if (! memory_operand (op, mode))
3903 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3906 return parts.disp != NULL_RTX;
3909 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3910 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3912 ??? It seems likely that this will only work because cmpsi is an
3913 expander, and no actual insns use this. */
3916 cmpsi_operand (op, mode)
3918 enum machine_mode mode;
3920 if (nonimmediate_operand (op, mode))
3923 if (GET_CODE (op) == AND
3924 && GET_MODE (op) == SImode
3925 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3926 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3927 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3928 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3929 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3930 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3936 /* Returns 1 if OP is memory operand that can not be represented by the
3940 long_memory_operand (op, mode)
3942 enum machine_mode mode;
3944 if (! memory_operand (op, mode))
3947 return memory_address_length (op) != 0;
3950 /* Return nonzero if the rtx is known aligned. */
3953 aligned_operand (op, mode)
3955 enum machine_mode mode;
3957 struct ix86_address parts;
3959 if (!general_operand (op, mode))
3962 /* Registers and immediate operands are always "aligned". */
3963 if (GET_CODE (op) != MEM)
3966 /* Don't even try to do any aligned optimizations with volatiles. */
3967 if (MEM_VOLATILE_P (op))
3972 /* Pushes and pops are only valid on the stack pointer. */
3973 if (GET_CODE (op) == PRE_DEC
3974 || GET_CODE (op) == POST_INC)
3977 /* Decode the address. */
3978 if (! ix86_decompose_address (op, &parts))
3981 if (parts.base && GET_CODE (parts.base) == SUBREG)
3982 parts.base = SUBREG_REG (parts.base);
3983 if (parts.index && GET_CODE (parts.index) == SUBREG)
3984 parts.index = SUBREG_REG (parts.index);
3986 /* Look for some component that isn't known to be aligned. */
3990 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3995 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4000 if (GET_CODE (parts.disp) != CONST_INT
4001 || (INTVAL (parts.disp) & 3) != 0)
4005 /* Didn't find one -- this must be an aligned address. */
4009 /* Return true if the constant is something that can be loaded with
4010 a special instruction. Only handle 0.0 and 1.0; others are less
4014 standard_80387_constant_p (x)
4017 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4019 /* Note that on the 80387, other constants, such as pi, that we should support
4020 too. On some machines, these are much slower to load as standard constant,
4021 than to load from doubles in memory. */
4022 if (x == CONST0_RTX (GET_MODE (x)))
4024 if (x == CONST1_RTX (GET_MODE (x)))
4029 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4032 standard_sse_constant_p (x)
4035 if (x == const0_rtx)
4037 return (x == CONST0_RTX (GET_MODE (x)));
4040 /* Returns 1 if OP contains a symbol reference */
4043 symbolic_reference_mentioned_p (op)
4046 register const char *fmt;
4049 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4052 fmt = GET_RTX_FORMAT (GET_CODE (op));
4053 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4059 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4060 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4064 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4071 /* Return 1 if it is appropriate to emit `ret' instructions in the
4072 body of a function. Do this only if the epilogue is simple, needing a
4073 couple of insns. Prior to reloading, we can't tell how many registers
4074 must be saved, so return 0 then. Return 0 if there is no frame
4075 marker to de-allocate.
4077 If NON_SAVING_SETJMP is defined and true, then it is not possible
4078 for the epilogue to be simple, so return 0. This is a special case
4079 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4080 until final, but jump_optimize may need to know sooner if a
4084 ix86_can_use_return_insn_p ()
4086 struct ix86_frame frame;
4088 #ifdef NON_SAVING_SETJMP
4089 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4093 if (! reload_completed || frame_pointer_needed)
4096 /* Don't allow more than 32 pop, since that's all we can do
4097 with one instruction. */
4098 if (current_function_pops_args
4099 && current_function_args_size >= 32768)
4102 ix86_compute_frame_layout (&frame);
4103 return frame.to_allocate == 0 && frame.nregs == 0;
4106 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4108 x86_64_sign_extended_value (value)
4111 switch (GET_CODE (value))
4113 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4114 to be at least 32 and this all acceptable constants are
4115 represented as CONST_INT. */
4117 if (HOST_BITS_PER_WIDE_INT == 32)
4121 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4122 return trunc_int_for_mode (val, SImode) == val;
4126 /* For certain code models, the symbolic references are known to fit.
4127 in CM_SMALL_PIC model we know it fits if it is local to the shared
4128 library. Don't count TLS SYMBOL_REFs here, since they should fit
4129 only if inside of UNSPEC handled below. */
4131 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4133 /* For certain code models, the code is near as well. */
4135 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4136 || ix86_cmodel == CM_KERNEL);
4138 /* We also may accept the offsetted memory references in certain special
4141 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4142 switch (XINT (XEXP (value, 0), 1))
4144 case UNSPEC_GOTPCREL:
4146 case UNSPEC_GOTNTPOFF:
4152 if (GET_CODE (XEXP (value, 0)) == PLUS)
4154 rtx op1 = XEXP (XEXP (value, 0), 0);
4155 rtx op2 = XEXP (XEXP (value, 0), 1);
4156 HOST_WIDE_INT offset;
4158 if (ix86_cmodel == CM_LARGE)
4160 if (GET_CODE (op2) != CONST_INT)
4162 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4163 switch (GET_CODE (op1))
4166 /* For CM_SMALL assume that latest object is 16MB before
4167 end of 31bits boundary. We may also accept pretty
4168 large negative constants knowing that all objects are
4169 in the positive half of address space. */
4170 if (ix86_cmodel == CM_SMALL
4171 && offset < 16*1024*1024
4172 && trunc_int_for_mode (offset, SImode) == offset)
4174 /* For CM_KERNEL we know that all object resist in the
4175 negative half of 32bits address space. We may not
4176 accept negative offsets, since they may be just off
4177 and we may accept pretty large positive ones. */
4178 if (ix86_cmodel == CM_KERNEL
4180 && trunc_int_for_mode (offset, SImode) == offset)
4184 /* These conditions are similar to SYMBOL_REF ones, just the
4185 constraints for code models differ. */
4186 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4187 && offset < 16*1024*1024
4188 && trunc_int_for_mode (offset, SImode) == offset)
4190 if (ix86_cmodel == CM_KERNEL
4192 && trunc_int_for_mode (offset, SImode) == offset)
4196 switch (XINT (op1, 1))
4201 && trunc_int_for_mode (offset, SImode) == offset)
4215 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4217 x86_64_zero_extended_value (value)
4220 switch (GET_CODE (value))
4223 if (HOST_BITS_PER_WIDE_INT == 32)
4224 return (GET_MODE (value) == VOIDmode
4225 && !CONST_DOUBLE_HIGH (value));
4229 if (HOST_BITS_PER_WIDE_INT == 32)
4230 return INTVAL (value) >= 0;
4232 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4235 /* For certain code models, the symbolic references are known to fit. */
4237 return ix86_cmodel == CM_SMALL;
4239 /* For certain code models, the code is near as well. */
4241 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4243 /* We also may accept the offsetted memory references in certain special
4246 if (GET_CODE (XEXP (value, 0)) == PLUS)
4248 rtx op1 = XEXP (XEXP (value, 0), 0);
4249 rtx op2 = XEXP (XEXP (value, 0), 1);
4251 if (ix86_cmodel == CM_LARGE)
4253 switch (GET_CODE (op1))
4257 /* For small code model we may accept pretty large positive
4258 offsets, since one bit is available for free. Negative
4259 offsets are limited by the size of NULL pointer area
4260 specified by the ABI. */
4261 if (ix86_cmodel == CM_SMALL
4262 && GET_CODE (op2) == CONST_INT
4263 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4264 && (trunc_int_for_mode (INTVAL (op2), SImode)
4267 /* ??? For the kernel, we may accept adjustment of
4268 -0x10000000, since we know that it will just convert
4269 negative address space to positive, but perhaps this
4270 is not worthwhile. */
4273 /* These conditions are similar to SYMBOL_REF ones, just the
4274 constraints for code models differ. */
4275 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4276 && GET_CODE (op2) == CONST_INT
4277 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4278 && (trunc_int_for_mode (INTVAL (op2), SImode)
4292 /* Value should be nonzero if functions must have frame pointers.
4293 Zero means the frame pointer need not be set up (and parms may
4294 be accessed via the stack pointer) in functions that seem suitable. */
4297 ix86_frame_pointer_required ()
4299 /* If we accessed previous frames, then the generated code expects
4300 to be able to access the saved ebp value in our frame. */
4301 if (cfun->machine->accesses_prev_frame)
4304 /* Several x86 os'es need a frame pointer for other reasons,
4305 usually pertaining to setjmp. */
4306 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4309 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4310 the frame pointer by default. Turn it back on now if we've not
4311 got a leaf function. */
4312 if (TARGET_OMIT_LEAF_FRAME_POINTER
4313 && (!current_function_is_leaf))
4316 if (current_function_profile)
4322 /* Record that the current function accesses previous call frames. */
4325 ix86_setup_frame_addresses ()
4327 cfun->machine->accesses_prev_frame = 1;
4330 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4331 # define USE_HIDDEN_LINKONCE 1
4333 # define USE_HIDDEN_LINKONCE 0
4336 static int pic_labels_used;
4338 /* Fills in the label name that should be used for a pc thunk for
4339 the given register. */
4342 get_pc_thunk_name (name, regno)
4346 if (USE_HIDDEN_LINKONCE)
4347 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4349 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4353 /* This function generates code for -fpic that loads %ebx with
4354 the return address of the caller and then returns. */
4357 ix86_asm_file_end (file)
4363 for (regno = 0; regno < 8; ++regno)
4367 if (! ((pic_labels_used >> regno) & 1))
4370 get_pc_thunk_name (name, regno);
4372 if (USE_HIDDEN_LINKONCE)
4376 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4378 TREE_PUBLIC (decl) = 1;
4379 TREE_STATIC (decl) = 1;
4380 DECL_ONE_ONLY (decl) = 1;
4382 (*targetm.asm_out.unique_section) (decl, 0);
4383 named_section (decl, NULL, 0);
4385 (*targetm.asm_out.globalize_label) (file, name);
4386 fputs ("\t.hidden\t", file);
4387 assemble_name (file, name);
4389 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4394 ASM_OUTPUT_LABEL (file, name);
4397 xops[0] = gen_rtx_REG (SImode, regno);
4398 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4399 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4400 output_asm_insn ("ret", xops);
4404 /* Emit code for the SET_GOT patterns. */
4407 output_set_got (dest)
4413 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4415 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4417 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4420 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4422 output_asm_insn ("call\t%a2", xops);
4425 /* Output the "canonical" label name ("Lxx$pb") here too. This
4426 is what will be referred to by the Mach-O PIC subsystem. */
4427 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4429 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4430 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4433 output_asm_insn ("pop{l}\t%0", xops);
4438 get_pc_thunk_name (name, REGNO (dest));
4439 pic_labels_used |= 1 << REGNO (dest);
4441 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4442 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4443 output_asm_insn ("call\t%X2", xops);
4446 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4447 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4448 else if (!TARGET_MACHO)
4449 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4454 /* Generate an "push" pattern for input ARG. */
4460 return gen_rtx_SET (VOIDmode,
4462 gen_rtx_PRE_DEC (Pmode,
4463 stack_pointer_rtx)),
4467 /* Return >= 0 if there is an unused call-clobbered register available
4468 for the entire function. */
4471 ix86_select_alt_pic_regnum ()
4473 if (current_function_is_leaf && !current_function_profile)
4476 for (i = 2; i >= 0; --i)
4477 if (!regs_ever_live[i])
4481 return INVALID_REGNUM;
4484 /* Return 1 if we need to save REGNO. */
4486 ix86_save_reg (regno, maybe_eh_return)
4488 int maybe_eh_return;
4490 if (pic_offset_table_rtx
4491 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4492 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4493 || current_function_profile
4494 || current_function_calls_eh_return))
4496 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4501 if (current_function_calls_eh_return && maybe_eh_return)
4506 unsigned test = EH_RETURN_DATA_REGNO (i);
4507 if (test == INVALID_REGNUM)
4514 return (regs_ever_live[regno]
4515 && !call_used_regs[regno]
4516 && !fixed_regs[regno]
4517 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4520 /* Return number of registers to be saved on the stack. */
4528 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4529 if (ix86_save_reg (regno, true))
4534 /* Return the offset between two registers, one to be eliminated, and the other
4535 its replacement, at the start of a routine. */
4538 ix86_initial_elimination_offset (from, to)
4542 struct ix86_frame frame;
4543 ix86_compute_frame_layout (&frame);
4545 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4546 return frame.hard_frame_pointer_offset;
4547 else if (from == FRAME_POINTER_REGNUM
4548 && to == HARD_FRAME_POINTER_REGNUM)
4549 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4552 if (to != STACK_POINTER_REGNUM)
4554 else if (from == ARG_POINTER_REGNUM)
4555 return frame.stack_pointer_offset;
4556 else if (from != FRAME_POINTER_REGNUM)
4559 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4563 /* Fill structure ix86_frame about frame of currently computed function. */
4566 ix86_compute_frame_layout (frame)
4567 struct ix86_frame *frame;
4569 HOST_WIDE_INT total_size;
4570 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4572 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4573 HOST_WIDE_INT size = get_frame_size ();
4575 frame->nregs = ix86_nsaved_regs ();
4578 /* Skip return address and saved base pointer. */
4579 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4581 frame->hard_frame_pointer_offset = offset;
4583 /* Do some sanity checking of stack_alignment_needed and
4584 preferred_alignment, since i386 port is the only using those features
4585 that may break easily. */
4587 if (size && !stack_alignment_needed)
4589 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4591 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4593 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4596 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4597 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4599 /* Register save area */
4600 offset += frame->nregs * UNITS_PER_WORD;
4603 if (ix86_save_varrargs_registers)
4605 offset += X86_64_VARARGS_SIZE;
4606 frame->va_arg_size = X86_64_VARARGS_SIZE;
4609 frame->va_arg_size = 0;
4611 /* Align start of frame for local function. */
4612 frame->padding1 = ((offset + stack_alignment_needed - 1)
4613 & -stack_alignment_needed) - offset;
4615 offset += frame->padding1;
4617 /* Frame pointer points here. */
4618 frame->frame_pointer_offset = offset;
4622 /* Add outgoing arguments area. Can be skipped if we eliminated
4623 all the function calls as dead code. */
4624 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4626 offset += current_function_outgoing_args_size;
4627 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4630 frame->outgoing_arguments_size = 0;
4632 /* Align stack boundary. Only needed if we're calling another function
4634 if (!current_function_is_leaf || current_function_calls_alloca)
4635 frame->padding2 = ((offset + preferred_alignment - 1)
4636 & -preferred_alignment) - offset;
4638 frame->padding2 = 0;
4640 offset += frame->padding2;
4642 /* We've reached end of stack frame. */
4643 frame->stack_pointer_offset = offset;
4645 /* Size prologue needs to allocate. */
4646 frame->to_allocate =
4647 (size + frame->padding1 + frame->padding2
4648 + frame->outgoing_arguments_size + frame->va_arg_size);
4650 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4651 && current_function_is_leaf)
4653 frame->red_zone_size = frame->to_allocate;
4654 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4655 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4658 frame->red_zone_size = 0;
4659 frame->to_allocate -= frame->red_zone_size;
4660 frame->stack_pointer_offset -= frame->red_zone_size;
4662 fprintf (stderr, "nregs: %i\n", frame->nregs);
4663 fprintf (stderr, "size: %i\n", size);
4664 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4665 fprintf (stderr, "padding1: %i\n", frame->padding1);
4666 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4667 fprintf (stderr, "padding2: %i\n", frame->padding2);
4668 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4669 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4670 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4671 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4672 frame->hard_frame_pointer_offset);
4673 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4677 /* Emit code to save registers in the prologue. */
4680 ix86_emit_save_regs ()
4685 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4686 if (ix86_save_reg (regno, true))
4688 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4689 RTX_FRAME_RELATED_P (insn) = 1;
4693 /* Emit code to save registers using MOV insns. First register
4694 is restored from POINTER + OFFSET. */
4696 ix86_emit_save_regs_using_mov (pointer, offset)
4698 HOST_WIDE_INT offset;
4703 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4704 if (ix86_save_reg (regno, true))
4706 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4708 gen_rtx_REG (Pmode, regno));
4709 RTX_FRAME_RELATED_P (insn) = 1;
4710 offset += UNITS_PER_WORD;
4714 /* Expand the prologue into a bunch of separate insns. */
4717 ix86_expand_prologue ()
4721 struct ix86_frame frame;
4723 HOST_WIDE_INT allocate;
4725 ix86_compute_frame_layout (&frame);
4728 int count = frame.nregs;
4730 /* The fast prologue uses move instead of push to save registers. This
4731 is significantly longer, but also executes faster as modern hardware
4732 can execute the moves in parallel, but can't do that for push/pop.
4734 Be careful about choosing what prologue to emit: When function takes
4735 many instructions to execute we may use slow version as well as in
4736 case function is known to be outside hot spot (this is known with
4737 feedback only). Weight the size of function by number of registers
4738 to save as it is cheap to use one or two push instructions but very
4739 slow to use many of them. */
4741 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4742 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4743 || (flag_branch_probabilities
4744 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4745 use_fast_prologue_epilogue = 0;
4747 use_fast_prologue_epilogue = !expensive_function_p (count);
4748 if (TARGET_PROLOGUE_USING_MOVE)
4749 use_mov = use_fast_prologue_epilogue;
4752 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4753 slower on all targets. Also sdb doesn't like it. */
4755 if (frame_pointer_needed)
4757 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4758 RTX_FRAME_RELATED_P (insn) = 1;
4760 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4761 RTX_FRAME_RELATED_P (insn) = 1;
4764 allocate = frame.to_allocate;
4765 /* In case we are dealing only with single register and empty frame,
4766 push is equivalent of the mov+add sequence. */
4767 if (allocate == 0 && frame.nregs <= 1)
4771 ix86_emit_save_regs ();
4773 allocate += frame.nregs * UNITS_PER_WORD;
4777 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4779 insn = emit_insn (gen_pro_epilogue_adjust_stack
4780 (stack_pointer_rtx, stack_pointer_rtx,
4781 GEN_INT (-allocate)));
4782 RTX_FRAME_RELATED_P (insn) = 1;
4786 /* ??? Is this only valid for Win32? */
4793 arg0 = gen_rtx_REG (SImode, 0);
4794 emit_move_insn (arg0, GEN_INT (allocate));
4796 sym = gen_rtx_MEM (FUNCTION_MODE,
4797 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4798 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4800 CALL_INSN_FUNCTION_USAGE (insn)
4801 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4802 CALL_INSN_FUNCTION_USAGE (insn));
4804 /* Don't allow scheduling pass to move insns across __alloca
4806 emit_insn (gen_blockage (const0_rtx));
4810 if (!frame_pointer_needed || !frame.to_allocate)
4811 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4813 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4814 -frame.nregs * UNITS_PER_WORD);
4817 #ifdef SUBTARGET_PROLOGUE
4821 pic_reg_used = false;
4822 if (pic_offset_table_rtx
4823 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4824 || current_function_profile))
4826 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4828 if (alt_pic_reg_used != INVALID_REGNUM)
4829 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4831 pic_reg_used = true;
4836 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4838 /* Even with accurate pre-reload life analysis, we can wind up
4839 deleting all references to the pic register after reload.
4840 Consider if cross-jumping unifies two sides of a branch
4841 controlled by a comparison vs the only read from a global.
4842 In which case, allow the set_got to be deleted, though we're
4843 too late to do anything about the ebx save in the prologue. */
4844 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4847 /* Prevent function calls from be scheduled before the call to mcount.
4848 In the pic_reg_used case, make sure that the got load isn't deleted. */
4849 if (current_function_profile)
4850 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4853 /* Emit code to restore saved registers using MOV insns. First register
4854 is restored from POINTER + OFFSET. */
4856 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4859 int maybe_eh_return;
4863 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4864 if (ix86_save_reg (regno, maybe_eh_return))
4866 emit_move_insn (gen_rtx_REG (Pmode, regno),
4867 adjust_address (gen_rtx_MEM (Pmode, pointer),
4869 offset += UNITS_PER_WORD;
4873 /* Restore function stack, frame, and registers. */
4876 ix86_expand_epilogue (style)
4880 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4881 struct ix86_frame frame;
4882 HOST_WIDE_INT offset;
4884 ix86_compute_frame_layout (&frame);
4886 /* Calculate start of saved registers relative to ebp. Special care
4887 must be taken for the normal return case of a function using
4888 eh_return: the eax and edx registers are marked as saved, but not
4889 restored along this path. */
4890 offset = frame.nregs;
4891 if (current_function_calls_eh_return && style != 2)
4893 offset *= -UNITS_PER_WORD;
4895 /* If we're only restoring one register and sp is not valid then
4896 using a move instruction to restore the register since it's
4897 less work than reloading sp and popping the register.
4899 The default code result in stack adjustment using add/lea instruction,
4900 while this code results in LEAVE instruction (or discrete equivalent),
4901 so it is profitable in some other cases as well. Especially when there
4902 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4903 and there is exactly one register to pop. This heuristic may need some
4904 tuning in future. */
4905 if ((!sp_valid && frame.nregs <= 1)
4906 || (TARGET_EPILOGUE_USING_MOVE
4907 && use_fast_prologue_epilogue
4908 && (frame.nregs > 1 || frame.to_allocate))
4909 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4910 || (frame_pointer_needed && TARGET_USE_LEAVE
4911 && use_fast_prologue_epilogue && frame.nregs == 1)
4912 || current_function_calls_eh_return)
4914 /* Restore registers. We can use ebp or esp to address the memory
4915 locations. If both are available, default to ebp, since offsets
4916 are known to be small. Only exception is esp pointing directly to the
4917 end of block of saved registers, where we may simplify addressing
4920 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4921 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4922 frame.to_allocate, style == 2);
4924 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4925 offset, style == 2);
4927 /* eh_return epilogues need %ecx added to the stack pointer. */
4930 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4932 if (frame_pointer_needed)
4934 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4935 tmp = plus_constant (tmp, UNITS_PER_WORD);
4936 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4938 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4939 emit_move_insn (hard_frame_pointer_rtx, tmp);
4941 emit_insn (gen_pro_epilogue_adjust_stack
4942 (stack_pointer_rtx, sa, const0_rtx));
4946 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4947 tmp = plus_constant (tmp, (frame.to_allocate
4948 + frame.nregs * UNITS_PER_WORD));
4949 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4952 else if (!frame_pointer_needed)
4953 emit_insn (gen_pro_epilogue_adjust_stack
4954 (stack_pointer_rtx, stack_pointer_rtx,
4955 GEN_INT (frame.to_allocate
4956 + frame.nregs * UNITS_PER_WORD)));
4957 /* If not an i386, mov & pop is faster than "leave". */
4958 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4959 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4962 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4963 hard_frame_pointer_rtx,
4966 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4968 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4973 /* First step is to deallocate the stack frame so that we can
4974 pop the registers. */
4977 if (!frame_pointer_needed)
4979 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4980 hard_frame_pointer_rtx,
4983 else if (frame.to_allocate)
4984 emit_insn (gen_pro_epilogue_adjust_stack
4985 (stack_pointer_rtx, stack_pointer_rtx,
4986 GEN_INT (frame.to_allocate)));
4988 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4989 if (ix86_save_reg (regno, false))
4992 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4994 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4996 if (frame_pointer_needed)
4998 /* Leave results in shorter dependency chains on CPUs that are
4999 able to grok it fast. */
5000 if (TARGET_USE_LEAVE)
5001 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5002 else if (TARGET_64BIT)
5003 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5005 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5009 /* Sibcall epilogues don't want a return instruction. */
5013 if (current_function_pops_args && current_function_args_size)
5015 rtx popc = GEN_INT (current_function_pops_args);
5017 /* i386 can only pop 64K bytes. If asked to pop more, pop
5018 return address, do explicit add, and jump indirectly to the
5021 if (current_function_pops_args >= 65536)
5023 rtx ecx = gen_rtx_REG (SImode, 2);
5025 /* There are is no "pascal" calling convention in 64bit ABI. */
5029 emit_insn (gen_popsi1 (ecx));
5030 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5031 emit_jump_insn (gen_return_indirect_internal (ecx));
5034 emit_jump_insn (gen_return_pop_internal (popc));
5037 emit_jump_insn (gen_return_internal ());
5040 /* Reset from the function's potential modifications. */
5043 ix86_output_function_epilogue (file, size)
5044 FILE *file ATTRIBUTE_UNUSED;
5045 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5047 if (pic_offset_table_rtx)
5048 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5051 /* Extract the parts of an RTL expression that is a valid memory address
5052 for an instruction. Return 0 if the structure of the address is
5053 grossly off. Return -1 if the address contains ASHIFT, so it is not
5054 strictly valid, but still used for computing length of lea instruction.
5058 ix86_decompose_address (addr, out)
5060 struct ix86_address *out;
5062 rtx base = NULL_RTX;
5063 rtx index = NULL_RTX;
5064 rtx disp = NULL_RTX;
5065 HOST_WIDE_INT scale = 1;
5066 rtx scale_rtx = NULL_RTX;
5069 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5071 else if (GET_CODE (addr) == PLUS)
5073 rtx op0 = XEXP (addr, 0);
5074 rtx op1 = XEXP (addr, 1);
5075 enum rtx_code code0 = GET_CODE (op0);
5076 enum rtx_code code1 = GET_CODE (op1);
5078 if (code0 == REG || code0 == SUBREG)
5080 if (code1 == REG || code1 == SUBREG)
5081 index = op0, base = op1; /* index + base */
5083 base = op0, disp = op1; /* base + displacement */
5085 else if (code0 == MULT)
5087 index = XEXP (op0, 0);
5088 scale_rtx = XEXP (op0, 1);
5089 if (code1 == REG || code1 == SUBREG)
5090 base = op1; /* index*scale + base */
5092 disp = op1; /* index*scale + disp */
5094 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5096 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5097 scale_rtx = XEXP (XEXP (op0, 0), 1);
5098 base = XEXP (op0, 1);
5101 else if (code0 == PLUS)
5103 index = XEXP (op0, 0); /* index + base + disp */
5104 base = XEXP (op0, 1);
5110 else if (GET_CODE (addr) == MULT)
5112 index = XEXP (addr, 0); /* index*scale */
5113 scale_rtx = XEXP (addr, 1);
5115 else if (GET_CODE (addr) == ASHIFT)
5119 /* We're called for lea too, which implements ashift on occasion. */
5120 index = XEXP (addr, 0);
5121 tmp = XEXP (addr, 1);
5122 if (GET_CODE (tmp) != CONST_INT)
5124 scale = INTVAL (tmp);
5125 if ((unsigned HOST_WIDE_INT) scale > 3)
5131 disp = addr; /* displacement */
5133 /* Extract the integral value of scale. */
5136 if (GET_CODE (scale_rtx) != CONST_INT)
5138 scale = INTVAL (scale_rtx);
5141 /* Allow arg pointer and stack pointer as index if there is not scaling */
5142 if (base && index && scale == 1
5143 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5144 || index == stack_pointer_rtx))
5151 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5152 if ((base == hard_frame_pointer_rtx
5153 || base == frame_pointer_rtx
5154 || base == arg_pointer_rtx) && !disp)
5157 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5158 Avoid this by transforming to [%esi+0]. */
5159 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5160 && base && !index && !disp
5162 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5165 /* Special case: encode reg+reg instead of reg*2. */
5166 if (!base && index && scale && scale == 2)
5167 base = index, scale = 1;
5169 /* Special case: scaling cannot be encoded without base or displacement. */
5170 if (!base && !disp && index && scale != 1)
5181 /* Return cost of the memory address x.
5182 For i386, it is better to use a complex address than let gcc copy
5183 the address into a reg and make a new pseudo. But not if the address
5184 requires to two regs - that would mean more pseudos with longer
5187 ix86_address_cost (x)
5190 struct ix86_address parts;
5193 if (!ix86_decompose_address (x, &parts))
5196 if (parts.base && GET_CODE (parts.base) == SUBREG)
5197 parts.base = SUBREG_REG (parts.base);
5198 if (parts.index && GET_CODE (parts.index) == SUBREG)
5199 parts.index = SUBREG_REG (parts.index);
5201 /* More complex memory references are better. */
5202 if (parts.disp && parts.disp != const0_rtx)
5205 /* Attempt to minimize number of registers in the address. */
5207 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5209 && (!REG_P (parts.index)
5210 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5214 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5216 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5217 && parts.base != parts.index)
5220 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5221 since it's predecode logic can't detect the length of instructions
5222 and it degenerates to vector decoded. Increase cost of such
5223 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5224 to split such addresses or even refuse such addresses at all.
5226 Following addressing modes are affected:
5231 The first and last case may be avoidable by explicitly coding the zero in
5232 memory address, but I don't have AMD-K6 machine handy to check this
5236 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5237 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5238 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5244 /* If X is a machine specific address (i.e. a symbol or label being
5245 referenced as a displacement from the GOT implemented using an
5246 UNSPEC), then return the base term. Otherwise return X. */
5249 ix86_find_base_term (x)
5256 if (GET_CODE (x) != CONST)
5259 if (GET_CODE (term) == PLUS
5260 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5261 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5262 term = XEXP (term, 0);
5263 if (GET_CODE (term) != UNSPEC
5264 || XINT (term, 1) != UNSPEC_GOTPCREL)
5267 term = XVECEXP (term, 0, 0);
5269 if (GET_CODE (term) != SYMBOL_REF
5270 && GET_CODE (term) != LABEL_REF)
5276 if (GET_CODE (x) != PLUS
5277 || XEXP (x, 0) != pic_offset_table_rtx
5278 || GET_CODE (XEXP (x, 1)) != CONST)
5281 term = XEXP (XEXP (x, 1), 0);
5283 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5284 term = XEXP (term, 0);
5286 if (GET_CODE (term) != UNSPEC
5287 || XINT (term, 1) != UNSPEC_GOTOFF)
5290 term = XVECEXP (term, 0, 0);
5292 if (GET_CODE (term) != SYMBOL_REF
5293 && GET_CODE (term) != LABEL_REF)
5299 /* Determine if a given RTX is a valid constant. We already know this
5300 satisfies CONSTANT_P. */
5303 legitimate_constant_p (x)
5308 switch (GET_CODE (x))
5311 /* TLS symbols are not constant. */
5312 if (tls_symbolic_operand (x, Pmode))
5317 inner = XEXP (x, 0);
5319 /* Offsets of TLS symbols are never valid.
5320 Discourage CSE from creating them. */
5321 if (GET_CODE (inner) == PLUS
5322 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5325 /* Only some unspecs are valid as "constants". */
5326 if (GET_CODE (inner) == UNSPEC)
5327 switch (XINT (inner, 1))
5330 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5340 /* Otherwise we handle everything else in the move patterns. */
5344 /* Determine if it's legal to put X into the constant pool. This
5345 is not possible for the address of thread-local symbols, which
5346 is checked above. */
5349 ix86_cannot_force_const_mem (x)
5352 return !legitimate_constant_p (x);
5355 /* Determine if a given RTX is a valid constant address. */
5358 constant_address_p (x)
5361 switch (GET_CODE (x))
5368 return TARGET_64BIT;
5371 /* For Mach-O, really believe the CONST. */
5374 /* Otherwise fall through. */
5376 return !flag_pic && legitimate_constant_p (x);
5383 /* Nonzero if the constant value X is a legitimate general operand
5384 when generating PIC code. It is given that flag_pic is on and
5385 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5388 legitimate_pic_operand_p (x)
5393 switch (GET_CODE (x))
5396 inner = XEXP (x, 0);
5398 /* Only some unspecs are valid as "constants". */
5399 if (GET_CODE (inner) == UNSPEC)
5400 switch (XINT (inner, 1))
5403 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5411 return legitimate_pic_address_disp_p (x);
5418 /* Determine if a given CONST RTX is a valid memory displacement
5422 legitimate_pic_address_disp_p (disp)
5427 /* In 64bit mode we can allow direct addresses of symbols and labels
5428 when they are not dynamic symbols. */
5431 /* TLS references should always be enclosed in UNSPEC. */
5432 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5434 if (GET_CODE (disp) == SYMBOL_REF
5435 && ix86_cmodel == CM_SMALL_PIC
5436 && (CONSTANT_POOL_ADDRESS_P (disp)
5437 || SYMBOL_REF_FLAG (disp)))
5439 if (GET_CODE (disp) == LABEL_REF)
5441 if (GET_CODE (disp) == CONST
5442 && GET_CODE (XEXP (disp, 0)) == PLUS
5443 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5444 && ix86_cmodel == CM_SMALL_PIC
5445 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5446 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5447 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5448 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5449 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5450 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5453 if (GET_CODE (disp) != CONST)
5455 disp = XEXP (disp, 0);
5459 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5460 of GOT tables. We should not need these anyway. */
5461 if (GET_CODE (disp) != UNSPEC
5462 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5465 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5466 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5472 if (GET_CODE (disp) == PLUS)
5474 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5476 disp = XEXP (disp, 0);
5480 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5481 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5483 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5484 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5485 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5487 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5488 if (strstr (sym_name, "$pb") != 0)
5493 if (GET_CODE (disp) != UNSPEC)
5496 switch (XINT (disp, 1))
5501 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5503 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5504 case UNSPEC_GOTTPOFF:
5505 case UNSPEC_GOTNTPOFF:
5506 case UNSPEC_INDNTPOFF:
5509 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5511 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5513 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5519 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5520 memory address for an instruction. The MODE argument is the machine mode
5521 for the MEM expression that wants to use this address.
5523 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5524 convert common non-canonical forms to canonical form so that they will
5528 legitimate_address_p (mode, addr, strict)
5529 enum machine_mode mode;
5533 struct ix86_address parts;
5534 rtx base, index, disp;
5535 HOST_WIDE_INT scale;
5536 const char *reason = NULL;
5537 rtx reason_rtx = NULL_RTX;
5539 if (TARGET_DEBUG_ADDR)
5542 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5543 GET_MODE_NAME (mode), strict);
5547 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5549 if (TARGET_DEBUG_ADDR)
5550 fprintf (stderr, "Success.\n");
5554 if (ix86_decompose_address (addr, &parts) <= 0)
5556 reason = "decomposition failed";
5561 index = parts.index;
5563 scale = parts.scale;
5565 /* Validate base register.
5567 Don't allow SUBREG's here, it can lead to spill failures when the base
5568 is one word out of a two word structure, which is represented internally
5576 if (GET_CODE (base) == SUBREG)
5577 reg = SUBREG_REG (base);
5581 if (GET_CODE (reg) != REG)
5583 reason = "base is not a register";
5587 if (GET_MODE (base) != Pmode)
5589 reason = "base is not in Pmode";
5593 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5594 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5596 reason = "base is not valid";
5601 /* Validate index register.
5603 Don't allow SUBREG's here, it can lead to spill failures when the index
5604 is one word out of a two word structure, which is represented internally
5612 if (GET_CODE (index) == SUBREG)
5613 reg = SUBREG_REG (index);
5617 if (GET_CODE (reg) != REG)
5619 reason = "index is not a register";
5623 if (GET_MODE (index) != Pmode)
5625 reason = "index is not in Pmode";
5629 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5630 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5632 reason = "index is not valid";
5637 /* Validate scale factor. */
5640 reason_rtx = GEN_INT (scale);
5643 reason = "scale without index";
5647 if (scale != 2 && scale != 4 && scale != 8)
5649 reason = "scale is not a valid multiplier";
5654 /* Validate displacement. */
5659 if (GET_CODE (disp) == CONST
5660 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5661 switch (XINT (XEXP (disp, 0), 1))
5665 case UNSPEC_GOTPCREL:
5668 goto is_legitimate_pic;
5670 case UNSPEC_GOTTPOFF:
5671 case UNSPEC_GOTNTPOFF:
5672 case UNSPEC_INDNTPOFF:
5678 reason = "invalid address unspec";
5682 else if (flag_pic && (SYMBOLIC_CONST (disp)
5684 && !machopic_operand_p (disp)
5689 if (TARGET_64BIT && (index || base))
5691 /* foo@dtpoff(%rX) is ok. */
5692 if (GET_CODE (disp) != CONST
5693 || GET_CODE (XEXP (disp, 0)) != PLUS
5694 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5695 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5696 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5697 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5699 reason = "non-constant pic memory reference";
5703 else if (! legitimate_pic_address_disp_p (disp))
5705 reason = "displacement is an invalid pic construct";
5709 /* This code used to verify that a symbolic pic displacement
5710 includes the pic_offset_table_rtx register.
5712 While this is good idea, unfortunately these constructs may
5713 be created by "adds using lea" optimization for incorrect
5722 This code is nonsensical, but results in addressing
5723 GOT table with pic_offset_table_rtx base. We can't
5724 just refuse it easily, since it gets matched by
5725 "addsi3" pattern, that later gets split to lea in the
5726 case output register differs from input. While this
5727 can be handled by separate addsi pattern for this case
5728 that never results in lea, this seems to be easier and
5729 correct fix for crash to disable this test. */
5731 else if (!CONSTANT_ADDRESS_P (disp))
5733 reason = "displacement is not constant";
5736 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5738 reason = "displacement is out of range";
5741 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5743 reason = "displacement is a const_double";
5748 /* Everything looks valid. */
5749 if (TARGET_DEBUG_ADDR)
5750 fprintf (stderr, "Success.\n");
5754 if (TARGET_DEBUG_ADDR)
5756 fprintf (stderr, "Error: %s\n", reason);
5757 debug_rtx (reason_rtx);
5762 /* Return an unique alias set for the GOT. */
5764 static HOST_WIDE_INT
5765 ix86_GOT_alias_set ()
5767 static HOST_WIDE_INT set = -1;
5769 set = new_alias_set ();
5773 /* Return a legitimate reference for ORIG (an address) using the
5774 register REG. If REG is 0, a new pseudo is generated.
5776 There are two types of references that must be handled:
5778 1. Global data references must load the address from the GOT, via
5779 the PIC reg. An insn is emitted to do this load, and the reg is
5782 2. Static data references, constant pool addresses, and code labels
5783 compute the address as an offset from the GOT, whose base is in
5784 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5785 differentiate them from global data objects. The returned
5786 address is the PIC reg + an unspec constant.
5788 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5789 reg also appears in the address. */
5792 legitimize_pic_address (orig, reg)
5802 reg = gen_reg_rtx (Pmode);
5803 /* Use the generic Mach-O PIC machinery. */
5804 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5807 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5809 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5811 /* This symbol may be referenced via a displacement from the PIC
5812 base address (@GOTOFF). */
5814 if (reload_in_progress)
5815 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5816 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5817 new = gen_rtx_CONST (Pmode, new);
5818 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5822 emit_move_insn (reg, new);
5826 else if (GET_CODE (addr) == SYMBOL_REF)
5830 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5831 new = gen_rtx_CONST (Pmode, new);
5832 new = gen_rtx_MEM (Pmode, new);
5833 RTX_UNCHANGING_P (new) = 1;
5834 set_mem_alias_set (new, ix86_GOT_alias_set ());
5837 reg = gen_reg_rtx (Pmode);
5838 /* Use directly gen_movsi, otherwise the address is loaded
5839 into register for CSE. We don't want to CSE this addresses,
5840 instead we CSE addresses from the GOT table, so skip this. */
5841 emit_insn (gen_movsi (reg, new));
5846 /* This symbol must be referenced via a load from the
5847 Global Offset Table (@GOT). */
5849 if (reload_in_progress)
5850 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5851 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5852 new = gen_rtx_CONST (Pmode, new);
5853 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5854 new = gen_rtx_MEM (Pmode, new);
5855 RTX_UNCHANGING_P (new) = 1;
5856 set_mem_alias_set (new, ix86_GOT_alias_set ());
5859 reg = gen_reg_rtx (Pmode);
5860 emit_move_insn (reg, new);
5866 if (GET_CODE (addr) == CONST)
5868 addr = XEXP (addr, 0);
5870 /* We must match stuff we generate before. Assume the only
5871 unspecs that can get here are ours. Not that we could do
5872 anything with them anyway... */
5873 if (GET_CODE (addr) == UNSPEC
5874 || (GET_CODE (addr) == PLUS
5875 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5877 if (GET_CODE (addr) != PLUS)
5880 if (GET_CODE (addr) == PLUS)
5882 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5884 /* Check first to see if this is a constant offset from a @GOTOFF
5885 symbol reference. */
5886 if (local_symbolic_operand (op0, Pmode)
5887 && GET_CODE (op1) == CONST_INT)
5891 if (reload_in_progress)
5892 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5893 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5895 new = gen_rtx_PLUS (Pmode, new, op1);
5896 new = gen_rtx_CONST (Pmode, new);
5897 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5901 emit_move_insn (reg, new);
5907 if (INTVAL (op1) < -16*1024*1024
5908 || INTVAL (op1) >= 16*1024*1024)
5909 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5914 base = legitimize_pic_address (XEXP (addr, 0), reg);
5915 new = legitimize_pic_address (XEXP (addr, 1),
5916 base == reg ? NULL_RTX : reg);
5918 if (GET_CODE (new) == CONST_INT)
5919 new = plus_constant (base, INTVAL (new));
5922 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5924 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5925 new = XEXP (new, 1);
5927 new = gen_rtx_PLUS (Pmode, base, new);
5936 ix86_encode_section_info (decl, first)
5938 int first ATTRIBUTE_UNUSED;
5940 bool local_p = (*targetm.binds_local_p) (decl);
5943 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5944 if (GET_CODE (rtl) != MEM)
5946 symbol = XEXP (rtl, 0);
5947 if (GET_CODE (symbol) != SYMBOL_REF)
5950 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5951 symbol so that we may access it directly in the GOT. */
5954 SYMBOL_REF_FLAG (symbol) = local_p;
5956 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5957 "local dynamic", "initial exec" or "local exec" TLS models
5960 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5962 const char *symbol_str;
5965 enum tls_model kind = decl_tls_model (decl);
5967 if (TARGET_64BIT && ! flag_pic)
5969 /* x86-64 doesn't allow non-pic code for shared libraries,
5970 so don't generate GD/LD TLS models for non-pic code. */
5973 case TLS_MODEL_GLOBAL_DYNAMIC:
5974 kind = TLS_MODEL_INITIAL_EXEC; break;
5975 case TLS_MODEL_LOCAL_DYNAMIC:
5976 kind = TLS_MODEL_LOCAL_EXEC; break;
5982 symbol_str = XSTR (symbol, 0);
5984 if (symbol_str[0] == '%')
5986 if (symbol_str[1] == tls_model_chars[kind])
5990 len = strlen (symbol_str) + 1;
5991 newstr = alloca (len + 2);
5994 newstr[1] = tls_model_chars[kind];
5995 memcpy (newstr + 2, symbol_str, len);
5997 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
6001 /* Undo the above when printing symbol names. */
6004 ix86_strip_name_encoding (str)
6014 /* Load the thread pointer into a register. */
6017 get_thread_pointer ()
6021 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6022 tp = gen_rtx_MEM (Pmode, tp);
6023 RTX_UNCHANGING_P (tp) = 1;
6024 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6025 tp = force_reg (Pmode, tp);
6030 /* Try machine-dependent ways of modifying an illegitimate address
6031 to be legitimate. If we find one, return the new, valid address.
6032 This macro is used in only one place: `memory_address' in explow.c.
6034 OLDX is the address as it was before break_out_memory_refs was called.
6035 In some cases it is useful to look at this to decide what needs to be done.
6037 MODE and WIN are passed so that this macro can use
6038 GO_IF_LEGITIMATE_ADDRESS.
6040 It is always safe for this macro to do nothing. It exists to recognize
6041 opportunities to optimize the output.
6043 For the 80386, we handle X+REG by loading X into a register R and
6044 using R+REG. R will go in a general reg and indexing will be used.
6045 However, if REG is a broken-out memory address or multiplication,
6046 nothing needs to be done because REG can certainly go in a general reg.
6048 When -fpic is used, special handling is needed for symbolic references.
6049 See comments by legitimize_pic_address in i386.c for details. */
6052 legitimize_address (x, oldx, mode)
6054 register rtx oldx ATTRIBUTE_UNUSED;
6055 enum machine_mode mode;
6060 if (TARGET_DEBUG_ADDR)
6062 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6063 GET_MODE_NAME (mode));
6067 log = tls_symbolic_operand (x, mode);
6070 rtx dest, base, off, pic;
6075 case TLS_MODEL_GLOBAL_DYNAMIC:
6076 dest = gen_reg_rtx (Pmode);
6079 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6082 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6083 insns = get_insns ();
6086 emit_libcall_block (insns, dest, rax, x);
6089 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6092 case TLS_MODEL_LOCAL_DYNAMIC:
6093 base = gen_reg_rtx (Pmode);
6096 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6099 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6100 insns = get_insns ();
6103 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6104 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6105 emit_libcall_block (insns, base, rax, note);
6108 emit_insn (gen_tls_local_dynamic_base_32 (base));
6110 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6111 off = gen_rtx_CONST (Pmode, off);
6113 return gen_rtx_PLUS (Pmode, base, off);
6115 case TLS_MODEL_INITIAL_EXEC:
6119 type = UNSPEC_GOTNTPOFF;
6123 if (reload_in_progress)
6124 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6125 pic = pic_offset_table_rtx;
6126 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6128 else if (!TARGET_GNU_TLS)
6130 pic = gen_reg_rtx (Pmode);
6131 emit_insn (gen_set_got (pic));
6132 type = UNSPEC_GOTTPOFF;
6137 type = UNSPEC_INDNTPOFF;
6140 base = get_thread_pointer ();
6142 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6143 off = gen_rtx_CONST (Pmode, off);
6145 off = gen_rtx_PLUS (Pmode, pic, off);
6146 off = gen_rtx_MEM (Pmode, off);
6147 RTX_UNCHANGING_P (off) = 1;
6148 set_mem_alias_set (off, ix86_GOT_alias_set ());
6149 dest = gen_reg_rtx (Pmode);
6151 if (TARGET_64BIT || TARGET_GNU_TLS)
6153 emit_move_insn (dest, off);
6154 return gen_rtx_PLUS (Pmode, base, dest);
6157 emit_insn (gen_subsi3 (dest, base, off));
6160 case TLS_MODEL_LOCAL_EXEC:
6161 base = get_thread_pointer ();
6163 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6164 (TARGET_64BIT || TARGET_GNU_TLS)
6165 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6166 off = gen_rtx_CONST (Pmode, off);
6168 if (TARGET_64BIT || TARGET_GNU_TLS)
6169 return gen_rtx_PLUS (Pmode, base, off);
6172 dest = gen_reg_rtx (Pmode);
6173 emit_insn (gen_subsi3 (dest, base, off));
6184 if (flag_pic && SYMBOLIC_CONST (x))
6185 return legitimize_pic_address (x, 0);
6187 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6188 if (GET_CODE (x) == ASHIFT
6189 && GET_CODE (XEXP (x, 1)) == CONST_INT
6190 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6193 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6194 GEN_INT (1 << log));
6197 if (GET_CODE (x) == PLUS)
6199 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6201 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6202 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6203 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6206 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6207 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6208 GEN_INT (1 << log));
6211 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6212 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6213 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6216 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6217 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6218 GEN_INT (1 << log));
6221 /* Put multiply first if it isn't already. */
6222 if (GET_CODE (XEXP (x, 1)) == MULT)
6224 rtx tmp = XEXP (x, 0);
6225 XEXP (x, 0) = XEXP (x, 1);
6230 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6231 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6232 created by virtual register instantiation, register elimination, and
6233 similar optimizations. */
6234 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6237 x = gen_rtx_PLUS (Pmode,
6238 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6239 XEXP (XEXP (x, 1), 0)),
6240 XEXP (XEXP (x, 1), 1));
6244 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6245 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6246 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6247 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6248 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6249 && CONSTANT_P (XEXP (x, 1)))
6252 rtx other = NULL_RTX;
6254 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6256 constant = XEXP (x, 1);
6257 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6259 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6261 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6262 other = XEXP (x, 1);
6270 x = gen_rtx_PLUS (Pmode,
6271 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6272 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6273 plus_constant (other, INTVAL (constant)));
6277 if (changed && legitimate_address_p (mode, x, FALSE))
6280 if (GET_CODE (XEXP (x, 0)) == MULT)
6283 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6286 if (GET_CODE (XEXP (x, 1)) == MULT)
6289 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6293 && GET_CODE (XEXP (x, 1)) == REG
6294 && GET_CODE (XEXP (x, 0)) == REG)
6297 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6300 x = legitimize_pic_address (x, 0);
6303 if (changed && legitimate_address_p (mode, x, FALSE))
6306 if (GET_CODE (XEXP (x, 0)) == REG)
6308 register rtx temp = gen_reg_rtx (Pmode);
6309 register rtx val = force_operand (XEXP (x, 1), temp);
6311 emit_move_insn (temp, val);
6317 else if (GET_CODE (XEXP (x, 1)) == REG)
6319 register rtx temp = gen_reg_rtx (Pmode);
6320 register rtx val = force_operand (XEXP (x, 0), temp);
6322 emit_move_insn (temp, val);
6332 /* Print an integer constant expression in assembler syntax. Addition
6333 and subtraction are the only arithmetic that may appear in these
6334 expressions. FILE is the stdio stream to write to, X is the rtx, and
6335 CODE is the operand print code from the output string. */
6338 output_pic_addr_const (file, x, code)
6345 switch (GET_CODE (x))
6355 assemble_name (file, XSTR (x, 0));
6356 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6357 fputs ("@PLT", file);
6364 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6365 assemble_name (asm_out_file, buf);
6369 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6373 /* This used to output parentheses around the expression,
6374 but that does not work on the 386 (either ATT or BSD assembler). */
6375 output_pic_addr_const (file, XEXP (x, 0), code);
6379 if (GET_MODE (x) == VOIDmode)
6381 /* We can use %d if the number is <32 bits and positive. */
6382 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6383 fprintf (file, "0x%lx%08lx",
6384 (unsigned long) CONST_DOUBLE_HIGH (x),
6385 (unsigned long) CONST_DOUBLE_LOW (x));
6387 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6390 /* We can't handle floating point constants;
6391 PRINT_OPERAND must handle them. */
6392 output_operand_lossage ("floating constant misused");
6396 /* Some assemblers need integer constants to appear first. */
6397 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6399 output_pic_addr_const (file, XEXP (x, 0), code);
6401 output_pic_addr_const (file, XEXP (x, 1), code);
6403 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6405 output_pic_addr_const (file, XEXP (x, 1), code);
6407 output_pic_addr_const (file, XEXP (x, 0), code);
6415 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6416 output_pic_addr_const (file, XEXP (x, 0), code);
6418 output_pic_addr_const (file, XEXP (x, 1), code);
6420 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6424 if (XVECLEN (x, 0) != 1)
6426 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6427 switch (XINT (x, 1))
6430 fputs ("@GOT", file);
6433 fputs ("@GOTOFF", file);
6435 case UNSPEC_GOTPCREL:
6436 fputs ("@GOTPCREL(%rip)", file);
6438 case UNSPEC_GOTTPOFF:
6439 /* FIXME: This might be @TPOFF in Sun ld too. */
6440 fputs ("@GOTTPOFF", file);
6443 fputs ("@TPOFF", file);
6447 fputs ("@TPOFF", file);
6449 fputs ("@NTPOFF", file);
6452 fputs ("@DTPOFF", file);
6454 case UNSPEC_GOTNTPOFF:
6456 fputs ("@GOTTPOFF(%rip)", file);
6458 fputs ("@GOTNTPOFF", file);
6460 case UNSPEC_INDNTPOFF:
6461 fputs ("@INDNTPOFF", file);
6464 output_operand_lossage ("invalid UNSPEC as operand");
6470 output_operand_lossage ("invalid expression as operand");
6474 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6475 We need to handle our special PIC relocations. */
6478 i386_dwarf_output_addr_const (file, x)
6483 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6487 fprintf (file, "%s", ASM_LONG);
6490 output_pic_addr_const (file, x, '\0');
6492 output_addr_const (file, x);
6496 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6497 We need to emit DTP-relative relocations. */
6500 i386_output_dwarf_dtprel (file, size, x)
6505 fputs (ASM_LONG, file);
6506 output_addr_const (file, x);
6507 fputs ("@DTPOFF", file);
6513 fputs (", 0", file);
6520 /* In the name of slightly smaller debug output, and to cater to
6521 general assembler losage, recognize PIC+GOTOFF and turn it back
6522 into a direct symbol reference. */
6525 i386_simplify_dwarf_addr (orig_x)
6530 if (GET_CODE (x) == MEM)
6535 if (GET_CODE (x) != CONST
6536 || GET_CODE (XEXP (x, 0)) != UNSPEC
6537 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6538 || GET_CODE (orig_x) != MEM)
6540 return XVECEXP (XEXP (x, 0), 0, 0);
6543 if (GET_CODE (x) != PLUS
6544 || GET_CODE (XEXP (x, 1)) != CONST)
6547 if (GET_CODE (XEXP (x, 0)) == REG
6548 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6549 /* %ebx + GOT/GOTOFF */
6551 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6553 /* %ebx + %reg * scale + GOT/GOTOFF */
6555 if (GET_CODE (XEXP (y, 0)) == REG
6556 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6558 else if (GET_CODE (XEXP (y, 1)) == REG
6559 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6563 if (GET_CODE (y) != REG
6564 && GET_CODE (y) != MULT
6565 && GET_CODE (y) != ASHIFT)
6571 x = XEXP (XEXP (x, 1), 0);
6572 if (GET_CODE (x) == UNSPEC
6573 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6574 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6577 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6578 return XVECEXP (x, 0, 0);
6581 if (GET_CODE (x) == PLUS
6582 && GET_CODE (XEXP (x, 0)) == UNSPEC
6583 && GET_CODE (XEXP (x, 1)) == CONST_INT
6584 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6585 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6586 && GET_CODE (orig_x) != MEM)))
6588 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6590 return gen_rtx_PLUS (Pmode, y, x);
6598 put_condition_code (code, mode, reverse, fp, file)
6600 enum machine_mode mode;
6606 if (mode == CCFPmode || mode == CCFPUmode)
6608 enum rtx_code second_code, bypass_code;
6609 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6610 if (bypass_code != NIL || second_code != NIL)
6612 code = ix86_fp_compare_code_to_integer (code);
6616 code = reverse_condition (code);
6627 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6632 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6633 Those same assemblers have the same but opposite losage on cmov. */
6636 suffix = fp ? "nbe" : "a";
6639 if (mode == CCNOmode || mode == CCGOCmode)
6641 else if (mode == CCmode || mode == CCGCmode)
6652 if (mode == CCNOmode || mode == CCGOCmode)
6654 else if (mode == CCmode || mode == CCGCmode)
6663 suffix = fp ? "nb" : "ae";
6666 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6676 suffix = fp ? "u" : "p";
6679 suffix = fp ? "nu" : "np";
6684 fputs (suffix, file);
6688 print_reg (x, code, file)
6693 if (REGNO (x) == ARG_POINTER_REGNUM
6694 || REGNO (x) == FRAME_POINTER_REGNUM
6695 || REGNO (x) == FLAGS_REG
6696 || REGNO (x) == FPSR_REG)
6699 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6702 if (code == 'w' || MMX_REG_P (x))
6704 else if (code == 'b')
6706 else if (code == 'k')
6708 else if (code == 'q')
6710 else if (code == 'y')
6712 else if (code == 'h')
6715 code = GET_MODE_SIZE (GET_MODE (x));
6717 /* Irritatingly, AMD extended registers use different naming convention
6718 from the normal registers. */
6719 if (REX_INT_REG_P (x))
6726 error ("extended registers have no high halves");
6729 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6732 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6735 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6738 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6741 error ("unsupported operand size for extended register");
6749 if (STACK_TOP_P (x))
6751 fputs ("st(0)", file);
6758 if (! ANY_FP_REG_P (x))
6759 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6763 fputs (hi_reg_name[REGNO (x)], file);
6766 fputs (qi_reg_name[REGNO (x)], file);
6769 fputs (qi_high_reg_name[REGNO (x)], file);
6776 /* Locate some local-dynamic symbol still in use by this function
6777 so that we can print its name in some tls_local_dynamic_base
6781 get_some_local_dynamic_name ()
6785 if (cfun->machine->some_ld_name)
6786 return cfun->machine->some_ld_name;
6788 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6790 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6791 return cfun->machine->some_ld_name;
6797 get_some_local_dynamic_name_1 (px, data)
6799 void *data ATTRIBUTE_UNUSED;
6803 if (GET_CODE (x) == SYMBOL_REF
6804 && local_dynamic_symbolic_operand (x, Pmode))
6806 cfun->machine->some_ld_name = XSTR (x, 0);
6814 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6815 C -- print opcode suffix for set/cmov insn.
6816 c -- like C, but print reversed condition
6817 F,f -- likewise, but for floating-point.
6818 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6820 R -- print the prefix for register names.
6821 z -- print the opcode suffix for the size of the current operand.
6822 * -- print a star (in certain assembler syntax)
6823 A -- print an absolute memory reference.
6824 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6825 s -- print a shift double count, followed by the assemblers argument
6827 b -- print the QImode name of the register for the indicated operand.
6828 %b0 would print %al if operands[0] is reg 0.
6829 w -- likewise, print the HImode name of the register.
6830 k -- likewise, print the SImode name of the register.
6831 q -- likewise, print the DImode name of the register.
6832 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6833 y -- print "st(0)" instead of "st" as a register.
6834 D -- print condition for SSE cmp instruction.
6835 P -- if PIC, print an @PLT suffix.
6836 X -- don't print any sort of PIC '@' suffix for a symbol.
6837 & -- print some in-use local-dynamic symbol name.
6841 print_operand (file, x, code)
6851 if (ASSEMBLER_DIALECT == ASM_ATT)
6856 assemble_name (file, get_some_local_dynamic_name ());
6860 if (ASSEMBLER_DIALECT == ASM_ATT)
6862 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6864 /* Intel syntax. For absolute addresses, registers should not
6865 be surrounded by braces. */
6866 if (GET_CODE (x) != REG)
6869 PRINT_OPERAND (file, x, 0);
6877 PRINT_OPERAND (file, x, 0);
6882 if (ASSEMBLER_DIALECT == ASM_ATT)
6887 if (ASSEMBLER_DIALECT == ASM_ATT)
6892 if (ASSEMBLER_DIALECT == ASM_ATT)
6897 if (ASSEMBLER_DIALECT == ASM_ATT)
6902 if (ASSEMBLER_DIALECT == ASM_ATT)
6907 if (ASSEMBLER_DIALECT == ASM_ATT)
6912 /* 387 opcodes don't get size suffixes if the operands are
6914 if (STACK_REG_P (x))
6917 /* Likewise if using Intel opcodes. */
6918 if (ASSEMBLER_DIALECT == ASM_INTEL)
6921 /* This is the size of op from size of operand. */
6922 switch (GET_MODE_SIZE (GET_MODE (x)))
6925 #ifdef HAVE_GAS_FILDS_FISTS
6931 if (GET_MODE (x) == SFmode)
6946 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6948 #ifdef GAS_MNEMONICS
6974 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6976 PRINT_OPERAND (file, x, 0);
6982 /* Little bit of braindamage here. The SSE compare instructions
6983 does use completely different names for the comparisons that the
6984 fp conditional moves. */
6985 switch (GET_CODE (x))
7000 fputs ("unord", file);
7004 fputs ("neq", file);
7008 fputs ("nlt", file);
7012 fputs ("nle", file);
7015 fputs ("ord", file);
7023 #ifdef CMOV_SUN_AS_SYNTAX
7024 if (ASSEMBLER_DIALECT == ASM_ATT)
7026 switch (GET_MODE (x))
7028 case HImode: putc ('w', file); break;
7030 case SFmode: putc ('l', file); break;
7032 case DFmode: putc ('q', file); break;
7040 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7043 #ifdef CMOV_SUN_AS_SYNTAX
7044 if (ASSEMBLER_DIALECT == ASM_ATT)
7047 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7050 /* Like above, but reverse condition */
7052 /* Check to see if argument to %c is really a constant
7053 and not a condition code which needs to be reversed. */
7054 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7056 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7059 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7062 #ifdef CMOV_SUN_AS_SYNTAX
7063 if (ASSEMBLER_DIALECT == ASM_ATT)
7066 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7072 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7075 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7078 int pred_val = INTVAL (XEXP (x, 0));
7080 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7081 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7083 int taken = pred_val > REG_BR_PROB_BASE / 2;
7084 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7086 /* Emit hints only in the case default branch prediction
7087 heuristics would fail. */
7088 if (taken != cputaken)
7090 /* We use 3e (DS) prefix for taken branches and
7091 2e (CS) prefix for not taken branches. */
7093 fputs ("ds ; ", file);
7095 fputs ("cs ; ", file);
7102 output_operand_lossage ("invalid operand code `%c'", code);
7106 if (GET_CODE (x) == REG)
7108 PRINT_REG (x, code, file);
7111 else if (GET_CODE (x) == MEM)
7113 /* No `byte ptr' prefix for call instructions. */
7114 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7117 switch (GET_MODE_SIZE (GET_MODE (x)))
7119 case 1: size = "BYTE"; break;
7120 case 2: size = "WORD"; break;
7121 case 4: size = "DWORD"; break;
7122 case 8: size = "QWORD"; break;
7123 case 12: size = "XWORD"; break;
7124 case 16: size = "XMMWORD"; break;
7129 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7132 else if (code == 'w')
7134 else if (code == 'k')
7138 fputs (" PTR ", file);
7142 if (flag_pic && CONSTANT_ADDRESS_P (x))
7143 output_pic_addr_const (file, x, code);
7144 /* Avoid (%rip) for call operands. */
7145 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7146 && GET_CODE (x) != CONST_INT)
7147 output_addr_const (file, x);
7148 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7149 output_operand_lossage ("invalid constraints for operand");
7154 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7159 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7160 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7162 if (ASSEMBLER_DIALECT == ASM_ATT)
7164 fprintf (file, "0x%lx", l);
7167 /* These float cases don't actually occur as immediate operands. */
7168 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7172 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7173 fprintf (file, "%s", dstr);
7176 else if (GET_CODE (x) == CONST_DOUBLE
7177 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7181 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7182 fprintf (file, "%s", dstr);
7189 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7191 if (ASSEMBLER_DIALECT == ASM_ATT)
7194 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7195 || GET_CODE (x) == LABEL_REF)
7197 if (ASSEMBLER_DIALECT == ASM_ATT)
7200 fputs ("OFFSET FLAT:", file);
7203 if (GET_CODE (x) == CONST_INT)
7204 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7206 output_pic_addr_const (file, x, code);
7208 output_addr_const (file, x);
7212 /* Print a memory operand whose address is ADDR. */
7215 print_operand_address (file, addr)
7219 struct ix86_address parts;
7220 rtx base, index, disp;
7223 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7225 if (ASSEMBLER_DIALECT == ASM_INTEL)
7226 fputs ("DWORD PTR ", file);
7227 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7230 fputs ("fs:0", file);
7232 fputs ("gs:0", file);
7236 if (! ix86_decompose_address (addr, &parts))
7240 index = parts.index;
7242 scale = parts.scale;
7244 if (!base && !index)
7246 /* Displacement only requires special attention. */
7248 if (GET_CODE (disp) == CONST_INT)
7250 if (ASSEMBLER_DIALECT == ASM_INTEL)
7252 if (USER_LABEL_PREFIX[0] == 0)
7254 fputs ("ds:", file);
7256 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7259 output_pic_addr_const (file, addr, 0);
7261 output_addr_const (file, addr);
7263 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7265 && ((GET_CODE (addr) == SYMBOL_REF
7266 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7267 || GET_CODE (addr) == LABEL_REF
7268 || (GET_CODE (addr) == CONST
7269 && GET_CODE (XEXP (addr, 0)) == PLUS
7270 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7271 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7272 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7273 fputs ("(%rip)", file);
7277 if (ASSEMBLER_DIALECT == ASM_ATT)
7282 output_pic_addr_const (file, disp, 0);
7283 else if (GET_CODE (disp) == LABEL_REF)
7284 output_asm_label (disp);
7286 output_addr_const (file, disp);
7291 PRINT_REG (base, 0, file);
7295 PRINT_REG (index, 0, file);
7297 fprintf (file, ",%d", scale);
7303 rtx offset = NULL_RTX;
7307 /* Pull out the offset of a symbol; print any symbol itself. */
7308 if (GET_CODE (disp) == CONST
7309 && GET_CODE (XEXP (disp, 0)) == PLUS
7310 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7312 offset = XEXP (XEXP (disp, 0), 1);
7313 disp = gen_rtx_CONST (VOIDmode,
7314 XEXP (XEXP (disp, 0), 0));
7318 output_pic_addr_const (file, disp, 0);
7319 else if (GET_CODE (disp) == LABEL_REF)
7320 output_asm_label (disp);
7321 else if (GET_CODE (disp) == CONST_INT)
7324 output_addr_const (file, disp);
7330 PRINT_REG (base, 0, file);
7333 if (INTVAL (offset) >= 0)
7335 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7339 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7346 PRINT_REG (index, 0, file);
7348 fprintf (file, "*%d", scale);
7356 output_addr_const_extra (file, x)
7362 if (GET_CODE (x) != UNSPEC)
7365 op = XVECEXP (x, 0, 0);
7366 switch (XINT (x, 1))
7368 case UNSPEC_GOTTPOFF:
7369 output_addr_const (file, op);
7370 /* FIXME: This might be @TPOFF in Sun ld. */
7371 fputs ("@GOTTPOFF", file);
7374 output_addr_const (file, op);
7375 fputs ("@TPOFF", file);
7378 output_addr_const (file, op);
7380 fputs ("@TPOFF", file);
7382 fputs ("@NTPOFF", file);
7385 output_addr_const (file, op);
7386 fputs ("@DTPOFF", file);
7388 case UNSPEC_GOTNTPOFF:
7389 output_addr_const (file, op);
7391 fputs ("@GOTTPOFF(%rip)", file);
7393 fputs ("@GOTNTPOFF", file);
7395 case UNSPEC_INDNTPOFF:
7396 output_addr_const (file, op);
7397 fputs ("@INDNTPOFF", file);
7407 /* Split one or more DImode RTL references into pairs of SImode
7408 references. The RTL can be REG, offsettable MEM, integer constant, or
7409 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7410 split and "num" is its length. lo_half and hi_half are output arrays
7411 that parallel "operands". */
7414 split_di (operands, num, lo_half, hi_half)
7417 rtx lo_half[], hi_half[];
7421 rtx op = operands[num];
7423 /* simplify_subreg refuse to split volatile memory addresses,
7424 but we still have to handle it. */
7425 if (GET_CODE (op) == MEM)
7427 lo_half[num] = adjust_address (op, SImode, 0);
7428 hi_half[num] = adjust_address (op, SImode, 4);
7432 lo_half[num] = simplify_gen_subreg (SImode, op,
7433 GET_MODE (op) == VOIDmode
7434 ? DImode : GET_MODE (op), 0);
7435 hi_half[num] = simplify_gen_subreg (SImode, op,
7436 GET_MODE (op) == VOIDmode
7437 ? DImode : GET_MODE (op), 4);
7441 /* Split one or more TImode RTL references into pairs of SImode
7442 references. The RTL can be REG, offsettable MEM, integer constant, or
7443 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7444 split and "num" is its length. lo_half and hi_half are output arrays
7445 that parallel "operands". */
7448 split_ti (operands, num, lo_half, hi_half)
7451 rtx lo_half[], hi_half[];
7455 rtx op = operands[num];
7457 /* simplify_subreg refuse to split volatile memory addresses, but we
7458 still have to handle it. */
7459 if (GET_CODE (op) == MEM)
7461 lo_half[num] = adjust_address (op, DImode, 0);
7462 hi_half[num] = adjust_address (op, DImode, 8);
7466 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7467 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7472 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7473 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7474 is the expression of the binary operation. The output may either be
7475 emitted here, or returned to the caller, like all output_* functions.
7477 There is no guarantee that the operands are the same mode, as they
7478 might be within FLOAT or FLOAT_EXTEND expressions. */
7480 #ifndef SYSV386_COMPAT
7481 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7482 wants to fix the assemblers because that causes incompatibility
7483 with gcc. No-one wants to fix gcc because that causes
7484 incompatibility with assemblers... You can use the option of
7485 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7486 #define SYSV386_COMPAT 1
7490 output_387_binary_op (insn, operands)
7494 static char buf[30];
7497 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7499 #ifdef ENABLE_CHECKING
7500 /* Even if we do not want to check the inputs, this documents input
7501 constraints. Which helps in understanding the following code. */
7502 if (STACK_REG_P (operands[0])
7503 && ((REG_P (operands[1])
7504 && REGNO (operands[0]) == REGNO (operands[1])
7505 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7506 || (REG_P (operands[2])
7507 && REGNO (operands[0]) == REGNO (operands[2])
7508 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7509 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7515 switch (GET_CODE (operands[3]))
7518 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7519 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7527 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7528 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7536 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7537 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7545 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7546 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7560 if (GET_MODE (operands[0]) == SFmode)
7561 strcat (buf, "ss\t{%2, %0|%0, %2}");
7563 strcat (buf, "sd\t{%2, %0|%0, %2}");
7568 switch (GET_CODE (operands[3]))
7572 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7574 rtx temp = operands[2];
7575 operands[2] = operands[1];
7579 /* know operands[0] == operands[1]. */
7581 if (GET_CODE (operands[2]) == MEM)
7587 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7589 if (STACK_TOP_P (operands[0]))
7590 /* How is it that we are storing to a dead operand[2]?
7591 Well, presumably operands[1] is dead too. We can't
7592 store the result to st(0) as st(0) gets popped on this
7593 instruction. Instead store to operands[2] (which I
7594 think has to be st(1)). st(1) will be popped later.
7595 gcc <= 2.8.1 didn't have this check and generated
7596 assembly code that the Unixware assembler rejected. */
7597 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7599 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7603 if (STACK_TOP_P (operands[0]))
7604 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7606 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7611 if (GET_CODE (operands[1]) == MEM)
7617 if (GET_CODE (operands[2]) == MEM)
7623 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7626 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7627 derived assemblers, confusingly reverse the direction of
7628 the operation for fsub{r} and fdiv{r} when the
7629 destination register is not st(0). The Intel assembler
7630 doesn't have this brain damage. Read !SYSV386_COMPAT to
7631 figure out what the hardware really does. */
7632 if (STACK_TOP_P (operands[0]))
7633 p = "{p\t%0, %2|rp\t%2, %0}";
7635 p = "{rp\t%2, %0|p\t%0, %2}";
7637 if (STACK_TOP_P (operands[0]))
7638 /* As above for fmul/fadd, we can't store to st(0). */
7639 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7641 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7646 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7649 if (STACK_TOP_P (operands[0]))
7650 p = "{rp\t%0, %1|p\t%1, %0}";
7652 p = "{p\t%1, %0|rp\t%0, %1}";
7654 if (STACK_TOP_P (operands[0]))
7655 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7657 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7662 if (STACK_TOP_P (operands[0]))
7664 if (STACK_TOP_P (operands[1]))
7665 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7667 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7670 else if (STACK_TOP_P (operands[1]))
7673 p = "{\t%1, %0|r\t%0, %1}";
7675 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7681 p = "{r\t%2, %0|\t%0, %2}";
7683 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7696 /* Output code to initialize control word copies used by
7697 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7698 is set to control word rounding downwards. */
7700 emit_i387_cw_initialization (normal, round_down)
7701 rtx normal, round_down;
7703 rtx reg = gen_reg_rtx (HImode);
7705 emit_insn (gen_x86_fnstcw_1 (normal));
7706 emit_move_insn (reg, normal);
7707 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7709 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7711 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7712 emit_move_insn (round_down, reg);
7715 /* Output code for INSN to convert a float to a signed int. OPERANDS
7716 are the insn operands. The output may be [HSD]Imode and the input
7717 operand may be [SDX]Fmode. */
7720 output_fix_trunc (insn, operands)
7724 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7725 int dimode_p = GET_MODE (operands[0]) == DImode;
7727 /* Jump through a hoop or two for DImode, since the hardware has no
7728 non-popping instruction. We used to do this a different way, but
7729 that was somewhat fragile and broke with post-reload splitters. */
7730 if (dimode_p && !stack_top_dies)
7731 output_asm_insn ("fld\t%y1", operands);
7733 if (!STACK_TOP_P (operands[1]))
7736 if (GET_CODE (operands[0]) != MEM)
7739 output_asm_insn ("fldcw\t%3", operands);
7740 if (stack_top_dies || dimode_p)
7741 output_asm_insn ("fistp%z0\t%0", operands);
7743 output_asm_insn ("fist%z0\t%0", operands);
7744 output_asm_insn ("fldcw\t%2", operands);
7749 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7750 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7751 when fucom should be used. */
7754 output_fp_compare (insn, operands, eflags_p, unordered_p)
7757 int eflags_p, unordered_p;
7760 rtx cmp_op0 = operands[0];
7761 rtx cmp_op1 = operands[1];
7762 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7767 cmp_op1 = operands[2];
7771 if (GET_MODE (operands[0]) == SFmode)
7773 return "ucomiss\t{%1, %0|%0, %1}";
7775 return "comiss\t{%1, %0|%0, %y}";
7778 return "ucomisd\t{%1, %0|%0, %1}";
7780 return "comisd\t{%1, %0|%0, %y}";
7783 if (! STACK_TOP_P (cmp_op0))
7786 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7788 if (STACK_REG_P (cmp_op1)
7790 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7791 && REGNO (cmp_op1) != FIRST_STACK_REG)
7793 /* If both the top of the 387 stack dies, and the other operand
7794 is also a stack register that dies, then this must be a
7795 `fcompp' float compare */
7799 /* There is no double popping fcomi variant. Fortunately,
7800 eflags is immune from the fstp's cc clobbering. */
7802 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7804 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7812 return "fucompp\n\tfnstsw\t%0";
7814 return "fcompp\n\tfnstsw\t%0";
7827 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7829 static const char * const alt[24] =
7841 "fcomi\t{%y1, %0|%0, %y1}",
7842 "fcomip\t{%y1, %0|%0, %y1}",
7843 "fucomi\t{%y1, %0|%0, %y1}",
7844 "fucomip\t{%y1, %0|%0, %y1}",
7851 "fcom%z2\t%y2\n\tfnstsw\t%0",
7852 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7853 "fucom%z2\t%y2\n\tfnstsw\t%0",
7854 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7856 "ficom%z2\t%y2\n\tfnstsw\t%0",
7857 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7865 mask = eflags_p << 3;
7866 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7867 mask |= unordered_p << 1;
7868 mask |= stack_top_dies;
7881 ix86_output_addr_vec_elt (file, value)
7885 const char *directive = ASM_LONG;
7890 directive = ASM_QUAD;
7896 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7900 ix86_output_addr_diff_elt (file, value, rel)
7905 fprintf (file, "%s%s%d-%s%d\n",
7906 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7907 else if (HAVE_AS_GOTOFF_IN_DATA)
7908 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7910 else if (TARGET_MACHO)
7911 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7912 machopic_function_base_name () + 1);
7915 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7916 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7919 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7923 ix86_expand_clear (dest)
7928 /* We play register width games, which are only valid after reload. */
7929 if (!reload_completed)
7932 /* Avoid HImode and its attendant prefix byte. */
7933 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7934 dest = gen_rtx_REG (SImode, REGNO (dest));
7936 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7938 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7939 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7941 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7942 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7948 /* X is an unchanging MEM. If it is a constant pool reference, return
7949 the constant pool rtx, else NULL. */
7952 maybe_get_pool_constant (x)
7957 if (flag_pic && ! TARGET_64BIT)
7959 if (GET_CODE (x) != PLUS)
7961 if (XEXP (x, 0) != pic_offset_table_rtx)
7964 if (GET_CODE (x) != CONST)
7967 if (GET_CODE (x) != UNSPEC)
7969 if (XINT (x, 1) != UNSPEC_GOTOFF)
7971 x = XVECEXP (x, 0, 0);
7974 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7975 return get_pool_constant (x);
7981 ix86_expand_move (mode, operands)
7982 enum machine_mode mode;
7985 int strict = (reload_in_progress || reload_completed);
7986 rtx insn, op0, op1, tmp;
7991 if (tls_symbolic_operand (op1, Pmode))
7993 op1 = legitimize_address (op1, op1, VOIDmode);
7994 if (GET_CODE (op0) == MEM)
7996 tmp = gen_reg_rtx (mode);
7997 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
8001 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8006 rtx temp = ((reload_in_progress
8007 || ((op0 && GET_CODE (op0) == REG)
8009 ? op0 : gen_reg_rtx (Pmode));
8010 op1 = machopic_indirect_data_reference (op1, temp);
8011 op1 = machopic_legitimize_pic_address (op1, mode,
8012 temp == op1 ? 0 : temp);
8016 if (MACHOPIC_INDIRECT)
8017 op1 = machopic_indirect_data_reference (op1, 0);
8021 insn = gen_rtx_SET (VOIDmode, op0, op1);
8025 #endif /* TARGET_MACHO */
8026 if (GET_CODE (op0) == MEM)
8027 op1 = force_reg (Pmode, op1);
8031 if (GET_CODE (temp) != REG)
8032 temp = gen_reg_rtx (Pmode);
8033 temp = legitimize_pic_address (op1, temp);
8041 if (GET_CODE (op0) == MEM
8042 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8043 || !push_operand (op0, mode))
8044 && GET_CODE (op1) == MEM)
8045 op1 = force_reg (mode, op1);
8047 if (push_operand (op0, mode)
8048 && ! general_no_elim_operand (op1, mode))
8049 op1 = copy_to_mode_reg (mode, op1);
8051 /* Force large constants in 64bit compilation into register
8052 to get them CSEed. */
8053 if (TARGET_64BIT && mode == DImode
8054 && immediate_operand (op1, mode)
8055 && !x86_64_zero_extended_value (op1)
8056 && !register_operand (op0, mode)
8057 && optimize && !reload_completed && !reload_in_progress)
8058 op1 = copy_to_mode_reg (mode, op1);
8060 if (FLOAT_MODE_P (mode))
8062 /* If we are loading a floating point constant to a register,
8063 force the value to memory now, since we'll get better code
8064 out the back end. */
8068 else if (GET_CODE (op1) == CONST_DOUBLE
8069 && register_operand (op0, mode))
8070 op1 = validize_mem (force_const_mem (mode, op1));
8074 insn = gen_rtx_SET (VOIDmode, op0, op1);
8080 ix86_expand_vector_move (mode, operands)
8081 enum machine_mode mode;
8084 /* Force constants other than zero into memory. We do not know how
8085 the instructions used to build constants modify the upper 64 bits
8086 of the register, once we have that information we may be able
8087 to handle some of them more efficiently. */
8088 if ((reload_in_progress | reload_completed) == 0
8089 && register_operand (operands[0], mode)
8090 && CONSTANT_P (operands[1]))
8091 operands[1] = force_const_mem (mode, operands[1]);
8093 /* Make operand1 a register if it isn't already. */
8095 && !register_operand (operands[0], mode)
8096 && !register_operand (operands[1], mode))
8098 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8099 emit_move_insn (operands[0], temp);
8103 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8106 /* Attempt to expand a binary operator. Make the expansion closer to the
8107 actual machine, then just general_operand, which will allow 3 separate
8108 memory references (one output, two input) in a single insn. */
8111 ix86_expand_binary_operator (code, mode, operands)
8113 enum machine_mode mode;
8116 int matching_memory;
8117 rtx src1, src2, dst, op, clob;
8123 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8124 if (GET_RTX_CLASS (code) == 'c'
8125 && (rtx_equal_p (dst, src2)
8126 || immediate_operand (src1, mode)))
8133 /* If the destination is memory, and we do not have matching source
8134 operands, do things in registers. */
8135 matching_memory = 0;
8136 if (GET_CODE (dst) == MEM)
8138 if (rtx_equal_p (dst, src1))
8139 matching_memory = 1;
8140 else if (GET_RTX_CLASS (code) == 'c'
8141 && rtx_equal_p (dst, src2))
8142 matching_memory = 2;
8144 dst = gen_reg_rtx (mode);
8147 /* Both source operands cannot be in memory. */
8148 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8150 if (matching_memory != 2)
8151 src2 = force_reg (mode, src2);
8153 src1 = force_reg (mode, src1);
8156 /* If the operation is not commutable, source 1 cannot be a constant
8157 or non-matching memory. */
8158 if ((CONSTANT_P (src1)
8159 || (!matching_memory && GET_CODE (src1) == MEM))
8160 && GET_RTX_CLASS (code) != 'c')
8161 src1 = force_reg (mode, src1);
8163 /* If optimizing, copy to regs to improve CSE */
8164 if (optimize && ! no_new_pseudos)
8166 if (GET_CODE (dst) == MEM)
8167 dst = gen_reg_rtx (mode);
8168 if (GET_CODE (src1) == MEM)
8169 src1 = force_reg (mode, src1);
8170 if (GET_CODE (src2) == MEM)
8171 src2 = force_reg (mode, src2);
8174 /* Emit the instruction. */
8176 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8177 if (reload_in_progress)
8179 /* Reload doesn't know about the flags register, and doesn't know that
8180 it doesn't want to clobber it. We can only do this with PLUS. */
8187 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8188 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8191 /* Fix up the destination if needed. */
8192 if (dst != operands[0])
8193 emit_move_insn (operands[0], dst);
8196 /* Return TRUE or FALSE depending on whether the binary operator meets the
8197 appropriate constraints. */
8200 ix86_binary_operator_ok (code, mode, operands)
8202 enum machine_mode mode ATTRIBUTE_UNUSED;
8205 /* Both source operands cannot be in memory. */
8206 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8208 /* If the operation is not commutable, source 1 cannot be a constant. */
8209 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8211 /* If the destination is memory, we must have a matching source operand. */
8212 if (GET_CODE (operands[0]) == MEM
8213 && ! (rtx_equal_p (operands[0], operands[1])
8214 || (GET_RTX_CLASS (code) == 'c'
8215 && rtx_equal_p (operands[0], operands[2]))))
8217 /* If the operation is not commutable and the source 1 is memory, we must
8218 have a matching destination. */
8219 if (GET_CODE (operands[1]) == MEM
8220 && GET_RTX_CLASS (code) != 'c'
8221 && ! rtx_equal_p (operands[0], operands[1]))
8226 /* Attempt to expand a unary operator. Make the expansion closer to the
8227 actual machine, then just general_operand, which will allow 2 separate
8228 memory references (one output, one input) in a single insn. */
8231 ix86_expand_unary_operator (code, mode, operands)
8233 enum machine_mode mode;
8236 int matching_memory;
8237 rtx src, dst, op, clob;
8242 /* If the destination is memory, and we do not have matching source
8243 operands, do things in registers. */
8244 matching_memory = 0;
8245 if (GET_CODE (dst) == MEM)
8247 if (rtx_equal_p (dst, src))
8248 matching_memory = 1;
8250 dst = gen_reg_rtx (mode);
8253 /* When source operand is memory, destination must match. */
8254 if (!matching_memory && GET_CODE (src) == MEM)
8255 src = force_reg (mode, src);
8257 /* If optimizing, copy to regs to improve CSE */
8258 if (optimize && ! no_new_pseudos)
8260 if (GET_CODE (dst) == MEM)
8261 dst = gen_reg_rtx (mode);
8262 if (GET_CODE (src) == MEM)
8263 src = force_reg (mode, src);
8266 /* Emit the instruction. */
8268 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8269 if (reload_in_progress || code == NOT)
8271 /* Reload doesn't know about the flags register, and doesn't know that
8272 it doesn't want to clobber it. */
8279 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8280 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8283 /* Fix up the destination if needed. */
8284 if (dst != operands[0])
8285 emit_move_insn (operands[0], dst);
8288 /* Return TRUE or FALSE depending on whether the unary operator meets the
8289 appropriate constraints. */
8292 ix86_unary_operator_ok (code, mode, operands)
8293 enum rtx_code code ATTRIBUTE_UNUSED;
8294 enum machine_mode mode ATTRIBUTE_UNUSED;
8295 rtx operands[2] ATTRIBUTE_UNUSED;
8297 /* If one of operands is memory, source and destination must match. */
8298 if ((GET_CODE (operands[0]) == MEM
8299 || GET_CODE (operands[1]) == MEM)
8300 && ! rtx_equal_p (operands[0], operands[1]))
8305 /* Return TRUE or FALSE depending on whether the first SET in INSN
8306 has source and destination with matching CC modes, and that the
8307 CC mode is at least as constrained as REQ_MODE. */
8310 ix86_match_ccmode (insn, req_mode)
8312 enum machine_mode req_mode;
8315 enum machine_mode set_mode;
8317 set = PATTERN (insn);
8318 if (GET_CODE (set) == PARALLEL)
8319 set = XVECEXP (set, 0, 0);
8320 if (GET_CODE (set) != SET)
8322 if (GET_CODE (SET_SRC (set)) != COMPARE)
8325 set_mode = GET_MODE (SET_DEST (set));
8329 if (req_mode != CCNOmode
8330 && (req_mode != CCmode
8331 || XEXP (SET_SRC (set), 1) != const0_rtx))
8335 if (req_mode == CCGCmode)
8339 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8343 if (req_mode == CCZmode)
8353 return (GET_MODE (SET_SRC (set)) == set_mode);
8356 /* Generate insn patterns to do an integer compare of OPERANDS. */
8359 ix86_expand_int_compare (code, op0, op1)
8363 enum machine_mode cmpmode;
8366 cmpmode = SELECT_CC_MODE (code, op0, op1);
8367 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8369 /* This is very simple, but making the interface the same as in the
8370 FP case makes the rest of the code easier. */
8371 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8372 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8374 /* Return the test that should be put into the flags user, i.e.
8375 the bcc, scc, or cmov instruction. */
8376 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8379 /* Figure out whether to use ordered or unordered fp comparisons.
8380 Return the appropriate mode to use. */
8383 ix86_fp_compare_mode (code)
8384 enum rtx_code code ATTRIBUTE_UNUSED;
8386 /* ??? In order to make all comparisons reversible, we do all comparisons
8387 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8388 all forms trapping and nontrapping comparisons, we can make inequality
8389 comparisons trapping again, since it results in better code when using
8390 FCOM based compares. */
8391 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8395 ix86_cc_mode (code, op0, op1)
8399 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8400 return ix86_fp_compare_mode (code);
8403 /* Only zero flag is needed. */
8405 case NE: /* ZF!=0 */
8407 /* Codes needing carry flag. */
8408 case GEU: /* CF=0 */
8409 case GTU: /* CF=0 & ZF=0 */
8410 case LTU: /* CF=1 */
8411 case LEU: /* CF=1 | ZF=1 */
8413 /* Codes possibly doable only with sign flag when
8414 comparing against zero. */
8415 case GE: /* SF=OF or SF=0 */
8416 case LT: /* SF<>OF or SF=1 */
8417 if (op1 == const0_rtx)
8420 /* For other cases Carry flag is not required. */
8422 /* Codes doable only with sign flag when comparing
8423 against zero, but we miss jump instruction for it
8424 so we need to use relational tests agains overflow
8425 that thus needs to be zero. */
8426 case GT: /* ZF=0 & SF=OF */
8427 case LE: /* ZF=1 | SF<>OF */
8428 if (op1 == const0_rtx)
8432 /* strcmp pattern do (use flags) and combine may ask us for proper
8441 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8444 ix86_use_fcomi_compare (code)
8445 enum rtx_code code ATTRIBUTE_UNUSED;
8447 enum rtx_code swapped_code = swap_condition (code);
8448 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8449 || (ix86_fp_comparison_cost (swapped_code)
8450 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8453 /* Swap, force into registers, or otherwise massage the two operands
8454 to a fp comparison. The operands are updated in place; the new
8455 comparison code is returned. */
8457 static enum rtx_code
8458 ix86_prepare_fp_compare_args (code, pop0, pop1)
8462 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8463 rtx op0 = *pop0, op1 = *pop1;
8464 enum machine_mode op_mode = GET_MODE (op0);
8465 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8467 /* All of the unordered compare instructions only work on registers.
8468 The same is true of the XFmode compare instructions. The same is
8469 true of the fcomi compare instructions. */
8472 && (fpcmp_mode == CCFPUmode
8473 || op_mode == XFmode
8474 || op_mode == TFmode
8475 || ix86_use_fcomi_compare (code)))
8477 op0 = force_reg (op_mode, op0);
8478 op1 = force_reg (op_mode, op1);
8482 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8483 things around if they appear profitable, otherwise force op0
8486 if (standard_80387_constant_p (op0) == 0
8487 || (GET_CODE (op0) == MEM
8488 && ! (standard_80387_constant_p (op1) == 0
8489 || GET_CODE (op1) == MEM)))
8492 tmp = op0, op0 = op1, op1 = tmp;
8493 code = swap_condition (code);
8496 if (GET_CODE (op0) != REG)
8497 op0 = force_reg (op_mode, op0);
8499 if (CONSTANT_P (op1))
8501 if (standard_80387_constant_p (op1))
8502 op1 = force_reg (op_mode, op1);
8504 op1 = validize_mem (force_const_mem (op_mode, op1));
8508 /* Try to rearrange the comparison to make it cheaper. */
8509 if (ix86_fp_comparison_cost (code)
8510 > ix86_fp_comparison_cost (swap_condition (code))
8511 && (GET_CODE (op1) == REG || !no_new_pseudos))
8514 tmp = op0, op0 = op1, op1 = tmp;
8515 code = swap_condition (code);
8516 if (GET_CODE (op0) != REG)
8517 op0 = force_reg (op_mode, op0);
8525 /* Convert comparison codes we use to represent FP comparison to integer
8526 code that will result in proper branch. Return UNKNOWN if no such code
8528 static enum rtx_code
8529 ix86_fp_compare_code_to_integer (code)
8559 /* Split comparison code CODE into comparisons we can do using branch
8560 instructions. BYPASS_CODE is comparison code for branch that will
8561 branch around FIRST_CODE and SECOND_CODE. If some of branches
8562 is not required, set value to NIL.
8563 We never require more than two branches. */
8565 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8566 enum rtx_code code, *bypass_code, *first_code, *second_code;
8572 /* The fcomi comparison sets flags as follows:
8582 case GT: /* GTU - CF=0 & ZF=0 */
8583 case GE: /* GEU - CF=0 */
8584 case ORDERED: /* PF=0 */
8585 case UNORDERED: /* PF=1 */
8586 case UNEQ: /* EQ - ZF=1 */
8587 case UNLT: /* LTU - CF=1 */
8588 case UNLE: /* LEU - CF=1 | ZF=1 */
8589 case LTGT: /* EQ - ZF=0 */
8591 case LT: /* LTU - CF=1 - fails on unordered */
8593 *bypass_code = UNORDERED;
8595 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8597 *bypass_code = UNORDERED;
8599 case EQ: /* EQ - ZF=1 - fails on unordered */
8601 *bypass_code = UNORDERED;
8603 case NE: /* NE - ZF=0 - fails on unordered */
8605 *second_code = UNORDERED;
8607 case UNGE: /* GEU - CF=0 - fails on unordered */
8609 *second_code = UNORDERED;
8611 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8613 *second_code = UNORDERED;
8618 if (!TARGET_IEEE_FP)
8625 /* Return cost of comparison done fcom + arithmetics operations on AX.
8626 All following functions do use number of instructions as a cost metrics.
8627 In future this should be tweaked to compute bytes for optimize_size and
8628 take into account performance of various instructions on various CPUs. */
8630 ix86_fp_comparison_arithmetics_cost (code)
8633 if (!TARGET_IEEE_FP)
8635 /* The cost of code output by ix86_expand_fp_compare. */
8663 /* Return cost of comparison done using fcomi operation.
8664 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8666 ix86_fp_comparison_fcomi_cost (code)
8669 enum rtx_code bypass_code, first_code, second_code;
8670 /* Return arbitrarily high cost when instruction is not supported - this
8671 prevents gcc from using it. */
8674 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8675 return (bypass_code != NIL || second_code != NIL) + 2;
8678 /* Return cost of comparison done using sahf operation.
8679 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8681 ix86_fp_comparison_sahf_cost (code)
8684 enum rtx_code bypass_code, first_code, second_code;
8685 /* Return arbitrarily high cost when instruction is not preferred - this
8686 avoids gcc from using it. */
8687 if (!TARGET_USE_SAHF && !optimize_size)
8689 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8690 return (bypass_code != NIL || second_code != NIL) + 3;
8693 /* Compute cost of the comparison done using any method.
8694 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8696 ix86_fp_comparison_cost (code)
8699 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8702 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8703 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8705 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8706 if (min > sahf_cost)
8708 if (min > fcomi_cost)
8713 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8716 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8718 rtx op0, op1, scratch;
8722 enum machine_mode fpcmp_mode, intcmp_mode;
8724 int cost = ix86_fp_comparison_cost (code);
8725 enum rtx_code bypass_code, first_code, second_code;
8727 fpcmp_mode = ix86_fp_compare_mode (code);
8728 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8731 *second_test = NULL_RTX;
8733 *bypass_test = NULL_RTX;
8735 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8737 /* Do fcomi/sahf based test when profitable. */
8738 if ((bypass_code == NIL || bypass_test)
8739 && (second_code == NIL || second_test)
8740 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8744 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8745 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8751 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8752 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8754 scratch = gen_reg_rtx (HImode);
8755 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8756 emit_insn (gen_x86_sahf_1 (scratch));
8759 /* The FP codes work out to act like unsigned. */
8760 intcmp_mode = fpcmp_mode;
8762 if (bypass_code != NIL)
8763 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8764 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8766 if (second_code != NIL)
8767 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8768 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8773 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8774 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8775 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8777 scratch = gen_reg_rtx (HImode);
8778 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8780 /* In the unordered case, we have to check C2 for NaN's, which
8781 doesn't happen to work out to anything nice combination-wise.
8782 So do some bit twiddling on the value we've got in AH to come
8783 up with an appropriate set of condition codes. */
8785 intcmp_mode = CCNOmode;
8790 if (code == GT || !TARGET_IEEE_FP)
8792 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8797 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8798 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8799 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8800 intcmp_mode = CCmode;
8806 if (code == LT && TARGET_IEEE_FP)
8808 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8809 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8810 intcmp_mode = CCmode;
8815 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8821 if (code == GE || !TARGET_IEEE_FP)
8823 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8828 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8829 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8836 if (code == LE && TARGET_IEEE_FP)
8838 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8839 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8840 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8841 intcmp_mode = CCmode;
8846 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8852 if (code == EQ && TARGET_IEEE_FP)
8854 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8855 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8856 intcmp_mode = CCmode;
8861 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8868 if (code == NE && TARGET_IEEE_FP)
8870 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8871 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8877 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8883 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8887 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8896 /* Return the test that should be put into the flags user, i.e.
8897 the bcc, scc, or cmov instruction. */
8898 return gen_rtx_fmt_ee (code, VOIDmode,
8899 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8904 ix86_expand_compare (code, second_test, bypass_test)
8906 rtx *second_test, *bypass_test;
8909 op0 = ix86_compare_op0;
8910 op1 = ix86_compare_op1;
8913 *second_test = NULL_RTX;
8915 *bypass_test = NULL_RTX;
8917 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8918 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8919 second_test, bypass_test);
8921 ret = ix86_expand_int_compare (code, op0, op1);
8926 /* Return true if the CODE will result in nontrivial jump sequence. */
8928 ix86_fp_jump_nontrivial_p (code)
8931 enum rtx_code bypass_code, first_code, second_code;
8934 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8935 return bypass_code != NIL || second_code != NIL;
8939 ix86_expand_branch (code, label)
8945 switch (GET_MODE (ix86_compare_op0))
8951 tmp = ix86_expand_compare (code, NULL, NULL);
8952 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8953 gen_rtx_LABEL_REF (VOIDmode, label),
8955 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8965 enum rtx_code bypass_code, first_code, second_code;
8967 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8970 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8972 /* Check whether we will use the natural sequence with one jump. If
8973 so, we can expand jump early. Otherwise delay expansion by
8974 creating compound insn to not confuse optimizers. */
8975 if (bypass_code == NIL && second_code == NIL
8978 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8979 gen_rtx_LABEL_REF (VOIDmode, label),
8984 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8985 ix86_compare_op0, ix86_compare_op1);
8986 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8987 gen_rtx_LABEL_REF (VOIDmode, label),
8989 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8991 use_fcomi = ix86_use_fcomi_compare (code);
8992 vec = rtvec_alloc (3 + !use_fcomi);
8993 RTVEC_ELT (vec, 0) = tmp;
8995 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8997 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9000 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9002 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9010 /* Expand DImode branch into multiple compare+branch. */
9012 rtx lo[2], hi[2], label2;
9013 enum rtx_code code1, code2, code3;
9015 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9017 tmp = ix86_compare_op0;
9018 ix86_compare_op0 = ix86_compare_op1;
9019 ix86_compare_op1 = tmp;
9020 code = swap_condition (code);
9022 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9023 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9025 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9026 avoid two branches. This costs one extra insn, so disable when
9027 optimizing for size. */
9029 if ((code == EQ || code == NE)
9031 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9036 if (hi[1] != const0_rtx)
9037 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9038 NULL_RTX, 0, OPTAB_WIDEN);
9041 if (lo[1] != const0_rtx)
9042 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9043 NULL_RTX, 0, OPTAB_WIDEN);
9045 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9046 NULL_RTX, 0, OPTAB_WIDEN);
9048 ix86_compare_op0 = tmp;
9049 ix86_compare_op1 = const0_rtx;
9050 ix86_expand_branch (code, label);
9054 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9055 op1 is a constant and the low word is zero, then we can just
9056 examine the high word. */
9058 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9061 case LT: case LTU: case GE: case GEU:
9062 ix86_compare_op0 = hi[0];
9063 ix86_compare_op1 = hi[1];
9064 ix86_expand_branch (code, label);
9070 /* Otherwise, we need two or three jumps. */
9072 label2 = gen_label_rtx ();
9075 code2 = swap_condition (code);
9076 code3 = unsigned_condition (code);
9080 case LT: case GT: case LTU: case GTU:
9083 case LE: code1 = LT; code2 = GT; break;
9084 case GE: code1 = GT; code2 = LT; break;
9085 case LEU: code1 = LTU; code2 = GTU; break;
9086 case GEU: code1 = GTU; code2 = LTU; break;
9088 case EQ: code1 = NIL; code2 = NE; break;
9089 case NE: code2 = NIL; break;
9097 * if (hi(a) < hi(b)) goto true;
9098 * if (hi(a) > hi(b)) goto false;
9099 * if (lo(a) < lo(b)) goto true;
9103 ix86_compare_op0 = hi[0];
9104 ix86_compare_op1 = hi[1];
9107 ix86_expand_branch (code1, label);
9109 ix86_expand_branch (code2, label2);
9111 ix86_compare_op0 = lo[0];
9112 ix86_compare_op1 = lo[1];
9113 ix86_expand_branch (code3, label);
9116 emit_label (label2);
9125 /* Split branch based on floating point condition. */
9127 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9129 rtx op1, op2, target1, target2, tmp;
9132 rtx label = NULL_RTX;
9134 int bypass_probability = -1, second_probability = -1, probability = -1;
9137 if (target2 != pc_rtx)
9140 code = reverse_condition_maybe_unordered (code);
9145 condition = ix86_expand_fp_compare (code, op1, op2,
9146 tmp, &second, &bypass);
9148 if (split_branch_probability >= 0)
9150 /* Distribute the probabilities across the jumps.
9151 Assume the BYPASS and SECOND to be always test
9153 probability = split_branch_probability;
9155 /* Value of 1 is low enough to make no need for probability
9156 to be updated. Later we may run some experiments and see
9157 if unordered values are more frequent in practice. */
9159 bypass_probability = 1;
9161 second_probability = 1;
9163 if (bypass != NULL_RTX)
9165 label = gen_label_rtx ();
9166 i = emit_jump_insn (gen_rtx_SET
9168 gen_rtx_IF_THEN_ELSE (VOIDmode,
9170 gen_rtx_LABEL_REF (VOIDmode,
9173 if (bypass_probability >= 0)
9175 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9176 GEN_INT (bypass_probability),
9179 i = emit_jump_insn (gen_rtx_SET
9181 gen_rtx_IF_THEN_ELSE (VOIDmode,
9182 condition, target1, target2)));
9183 if (probability >= 0)
9185 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9186 GEN_INT (probability),
9188 if (second != NULL_RTX)
9190 i = emit_jump_insn (gen_rtx_SET
9192 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9194 if (second_probability >= 0)
9196 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9197 GEN_INT (second_probability),
9200 if (label != NULL_RTX)
9205 ix86_expand_setcc (code, dest)
9209 rtx ret, tmp, tmpreg;
9210 rtx second_test, bypass_test;
9212 if (GET_MODE (ix86_compare_op0) == DImode
9214 return 0; /* FAIL */
9216 if (GET_MODE (dest) != QImode)
9219 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9220 PUT_MODE (ret, QImode);
9225 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9226 if (bypass_test || second_test)
9228 rtx test = second_test;
9230 rtx tmp2 = gen_reg_rtx (QImode);
9237 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9239 PUT_MODE (test, QImode);
9240 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9243 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9245 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9248 return 1; /* DONE */
9251 /* Expand comparison setting or clearing carry flag. Return true when successful
9252 and set pop for the operation. */
9254 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9258 enum machine_mode mode =
9259 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9261 /* Do not handle DImode compares that go trought special path. Also we can't
9262 deal with FP compares yet. This is possible to add. */
9263 if ((mode == DImode && !TARGET_64BIT) || !INTEGRAL_MODE_P (mode))
9271 /* Convert a==0 into (unsigned)a<1. */
9274 if (op1 != const0_rtx)
9277 code = (code == EQ ? LTU : GEU);
9280 /* Convert a>b into b<a or a>=b-1. */
9283 if (GET_CODE (op1) == CONST_INT)
9285 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9286 /* Bail out on overflow. We still can swap operands but that
9287 would force loading of the constant into register. */
9288 if (op1 == const0_rtx
9289 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9291 code = (code == GTU ? GEU : LTU);
9298 code = (code == GTU ? LTU : GEU);
9302 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9305 if (mode == DImode || op1 != const0_rtx)
9307 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9308 code = (code == LT ? GEU : LTU);
9312 if (mode == DImode || op1 != constm1_rtx)
9314 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9315 code = (code == LE ? GEU : LTU);
9321 ix86_compare_op0 = op0;
9322 ix86_compare_op1 = op1;
9323 *pop = ix86_expand_compare (code, NULL, NULL);
9324 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9330 ix86_expand_int_movcc (operands)
9333 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9334 rtx compare_seq, compare_op;
9335 rtx second_test, bypass_test;
9336 enum machine_mode mode = GET_MODE (operands[0]);
9337 bool sign_bit_compare_p = false;;
9340 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9341 compare_seq = get_insns ();
9344 compare_code = GET_CODE (compare_op);
9346 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9347 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9348 sign_bit_compare_p = true;
9350 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9351 HImode insns, we'd be swallowed in word prefix ops. */
9353 if ((mode != HImode || TARGET_FAST_PREFIX)
9354 && (mode != DImode || TARGET_64BIT)
9355 && GET_CODE (operands[2]) == CONST_INT
9356 && GET_CODE (operands[3]) == CONST_INT)
9358 rtx out = operands[0];
9359 HOST_WIDE_INT ct = INTVAL (operands[2]);
9360 HOST_WIDE_INT cf = INTVAL (operands[3]);
9364 /* Sign bit compares are better done using shifts than we do by using
9366 if (sign_bit_compare_p
9367 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9368 ix86_compare_op1, &compare_op))
9370 /* Detect overlap between destination and compare sources. */
9373 if (!sign_bit_compare_p)
9375 compare_code = GET_CODE (compare_op);
9377 /* To simplify rest of code, restrict to the GEU case. */
9378 if (compare_code == LTU)
9380 HOST_WIDE_INT tmp = ct;
9383 compare_code = reverse_condition (compare_code);
9384 code = reverse_condition (code);
9388 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9389 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9390 tmp = gen_reg_rtx (mode);
9393 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9395 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp)));
9399 if (code == GT || code == GE)
9400 code = reverse_condition (code);
9403 HOST_WIDE_INT tmp = ct;
9407 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9408 ix86_compare_op1, VOIDmode, 0, -1);
9421 tmp = expand_simple_binop (mode, PLUS,
9423 copy_rtx (tmp), 1, OPTAB_DIRECT);
9434 tmp = expand_simple_binop (mode, IOR,
9436 copy_rtx (tmp), 1, OPTAB_DIRECT);
9438 else if (diff == -1 && ct)
9448 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9450 tmp = expand_simple_binop (mode, PLUS,
9451 copy_rtx (tmp), GEN_INT (cf),
9452 copy_rtx (tmp), 1, OPTAB_DIRECT);
9460 * andl cf - ct, dest
9470 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9473 tmp = expand_simple_binop (mode, AND,
9475 gen_int_mode (cf - ct, mode),
9476 copy_rtx (tmp), 1, OPTAB_DIRECT);
9478 tmp = expand_simple_binop (mode, PLUS,
9479 copy_rtx (tmp), GEN_INT (ct),
9480 copy_rtx (tmp), 1, OPTAB_DIRECT);
9483 if (!rtx_equal_p (tmp, out))
9484 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9486 return 1; /* DONE */
9492 tmp = ct, ct = cf, cf = tmp;
9494 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9496 /* We may be reversing unordered compare to normal compare, that
9497 is not valid in general (we may convert non-trapping condition
9498 to trapping one), however on i386 we currently emit all
9499 comparisons unordered. */
9500 compare_code = reverse_condition_maybe_unordered (compare_code);
9501 code = reverse_condition_maybe_unordered (code);
9505 compare_code = reverse_condition (compare_code);
9506 code = reverse_condition (code);
9511 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9512 && GET_CODE (ix86_compare_op1) == CONST_INT)
9514 if (ix86_compare_op1 == const0_rtx
9515 && (code == LT || code == GE))
9516 compare_code = code;
9517 else if (ix86_compare_op1 == constm1_rtx)
9521 else if (code == GT)
9526 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9527 if (compare_code != NIL
9528 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9529 && (cf == -1 || ct == -1))
9531 /* If lea code below could be used, only optimize
9532 if it results in a 2 insn sequence. */
9534 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9535 || diff == 3 || diff == 5 || diff == 9)
9536 || (compare_code == LT && ct == -1)
9537 || (compare_code == GE && cf == -1))
9540 * notl op1 (if necessary)
9548 code = reverse_condition (code);
9551 out = emit_store_flag (out, code, ix86_compare_op0,
9552 ix86_compare_op1, VOIDmode, 0, -1);
9554 out = expand_simple_binop (mode, IOR,
9556 out, 1, OPTAB_DIRECT);
9557 if (out != operands[0])
9558 emit_move_insn (operands[0], out);
9560 return 1; /* DONE */
9565 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9566 || diff == 3 || diff == 5 || diff == 9)
9567 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9568 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9574 * lea cf(dest*(ct-cf)),dest
9578 * This also catches the degenerate setcc-only case.
9584 out = emit_store_flag (out, code, ix86_compare_op0,
9585 ix86_compare_op1, VOIDmode, 0, 1);
9588 /* On x86_64 the lea instruction operates on Pmode, so we need
9589 to get arithmetics done in proper mode to match. */
9591 tmp = copy_rtx (out);
9595 out1 = copy_rtx (out);
9596 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9600 tmp = gen_rtx_PLUS (mode, tmp, out1);
9606 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9609 if (!rtx_equal_p (tmp, out))
9612 out = force_operand (tmp, out);
9614 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9616 if (!rtx_equal_p (out, operands[0]))
9617 emit_move_insn (operands[0], copy_rtx (out));
9619 return 1; /* DONE */
9623 * General case: Jumpful:
9624 * xorl dest,dest cmpl op1, op2
9625 * cmpl op1, op2 movl ct, dest
9627 * decl dest movl cf, dest
9628 * andl (cf-ct),dest 1:
9633 * This is reasonably steep, but branch mispredict costs are
9634 * high on modern cpus, so consider failing only if optimizing
9638 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9639 && BRANCH_COST >= 2)
9645 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9646 /* We may be reversing unordered compare to normal compare,
9647 that is not valid in general (we may convert non-trapping
9648 condition to trapping one), however on i386 we currently
9649 emit all comparisons unordered. */
9650 code = reverse_condition_maybe_unordered (code);
9653 code = reverse_condition (code);
9654 if (compare_code != NIL)
9655 compare_code = reverse_condition (compare_code);
9659 if (compare_code != NIL)
9661 /* notl op1 (if needed)
9666 For x < 0 (resp. x <= -1) there will be no notl,
9667 so if possible swap the constants to get rid of the
9669 True/false will be -1/0 while code below (store flag
9670 followed by decrement) is 0/-1, so the constants need
9671 to be exchanged once more. */
9673 if (compare_code == GE || !cf)
9675 code = reverse_condition (code);
9680 HOST_WIDE_INT tmp = cf;
9685 out = emit_store_flag (out, code, ix86_compare_op0,
9686 ix86_compare_op1, VOIDmode, 0, -1);
9690 out = emit_store_flag (out, code, ix86_compare_op0,
9691 ix86_compare_op1, VOIDmode, 0, 1);
9693 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9694 copy_rtx (out), 1, OPTAB_DIRECT);
9697 out = expand_simple_binop (mode, AND, copy_rtx (out),
9698 gen_int_mode (cf - ct, mode),
9699 copy_rtx (out), 1, OPTAB_DIRECT);
9701 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9702 copy_rtx (out), 1, OPTAB_DIRECT);
9703 if (!rtx_equal_p (out, operands[0]))
9704 emit_move_insn (operands[0], copy_rtx (out));
9706 return 1; /* DONE */
9710 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9712 /* Try a few things more with specific constants and a variable. */
9715 rtx var, orig_out, out, tmp;
9717 if (BRANCH_COST <= 2)
9718 return 0; /* FAIL */
9720 /* If one of the two operands is an interesting constant, load a
9721 constant with the above and mask it in with a logical operation. */
9723 if (GET_CODE (operands[2]) == CONST_INT)
9726 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9727 operands[3] = constm1_rtx, op = and_optab;
9728 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9729 operands[3] = const0_rtx, op = ior_optab;
9731 return 0; /* FAIL */
9733 else if (GET_CODE (operands[3]) == CONST_INT)
9736 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9737 operands[2] = constm1_rtx, op = and_optab;
9738 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9739 operands[2] = const0_rtx, op = ior_optab;
9741 return 0; /* FAIL */
9744 return 0; /* FAIL */
9746 orig_out = operands[0];
9747 tmp = gen_reg_rtx (mode);
9750 /* Recurse to get the constant loaded. */
9751 if (ix86_expand_int_movcc (operands) == 0)
9752 return 0; /* FAIL */
9754 /* Mask in the interesting variable. */
9755 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9757 if (!rtx_equal_p (out, orig_out))
9758 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9760 return 1; /* DONE */
9764 * For comparison with above,
9774 if (! nonimmediate_operand (operands[2], mode))
9775 operands[2] = force_reg (mode, operands[2]);
9776 if (! nonimmediate_operand (operands[3], mode))
9777 operands[3] = force_reg (mode, operands[3]);
9779 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9781 rtx tmp = gen_reg_rtx (mode);
9782 emit_move_insn (tmp, operands[3]);
9785 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9787 rtx tmp = gen_reg_rtx (mode);
9788 emit_move_insn (tmp, operands[2]);
9792 if (! register_operand (operands[2], VOIDmode)
9794 || ! register_operand (operands[3], VOIDmode)))
9795 operands[2] = force_reg (mode, operands[2]);
9798 && ! register_operand (operands[3], VOIDmode))
9799 operands[3] = force_reg (mode, operands[3]);
9801 emit_insn (compare_seq);
9802 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9803 gen_rtx_IF_THEN_ELSE (mode,
9804 compare_op, operands[2],
9807 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9808 gen_rtx_IF_THEN_ELSE (mode,
9810 copy_rtx (operands[3]),
9811 copy_rtx (operands[0]))));
9813 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9814 gen_rtx_IF_THEN_ELSE (mode,
9816 copy_rtx (operands[2]),
9817 copy_rtx (operands[0]))));
9819 return 1; /* DONE */
9823 ix86_expand_fp_movcc (operands)
9828 rtx compare_op, second_test, bypass_test;
9830 /* For SF/DFmode conditional moves based on comparisons
9831 in same mode, we may want to use SSE min/max instructions. */
9832 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9833 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9834 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9835 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9837 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9838 /* We may be called from the post-reload splitter. */
9839 && (!REG_P (operands[0])
9840 || SSE_REG_P (operands[0])
9841 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9843 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9844 code = GET_CODE (operands[1]);
9846 /* See if we have (cross) match between comparison operands and
9847 conditional move operands. */
9848 if (rtx_equal_p (operands[2], op1))
9853 code = reverse_condition_maybe_unordered (code);
9855 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9857 /* Check for min operation. */
9858 if (code == LT || code == UNLE)
9866 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9867 if (memory_operand (op0, VOIDmode))
9868 op0 = force_reg (GET_MODE (operands[0]), op0);
9869 if (GET_MODE (operands[0]) == SFmode)
9870 emit_insn (gen_minsf3 (operands[0], op0, op1));
9872 emit_insn (gen_mindf3 (operands[0], op0, op1));
9875 /* Check for max operation. */
9876 if (code == GT || code == UNGE)
9884 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9885 if (memory_operand (op0, VOIDmode))
9886 op0 = force_reg (GET_MODE (operands[0]), op0);
9887 if (GET_MODE (operands[0]) == SFmode)
9888 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9890 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9894 /* Manage condition to be sse_comparison_operator. In case we are
9895 in non-ieee mode, try to canonicalize the destination operand
9896 to be first in the comparison - this helps reload to avoid extra
9898 if (!sse_comparison_operator (operands[1], VOIDmode)
9899 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9901 rtx tmp = ix86_compare_op0;
9902 ix86_compare_op0 = ix86_compare_op1;
9903 ix86_compare_op1 = tmp;
9904 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9905 VOIDmode, ix86_compare_op0,
9908 /* Similarly try to manage result to be first operand of conditional
9909 move. We also don't support the NE comparison on SSE, so try to
9911 if ((rtx_equal_p (operands[0], operands[3])
9912 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9913 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9915 rtx tmp = operands[2];
9916 operands[2] = operands[3];
9918 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9919 (GET_CODE (operands[1])),
9920 VOIDmode, ix86_compare_op0,
9923 if (GET_MODE (operands[0]) == SFmode)
9924 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9925 operands[2], operands[3],
9926 ix86_compare_op0, ix86_compare_op1));
9928 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9929 operands[2], operands[3],
9930 ix86_compare_op0, ix86_compare_op1));
9934 /* The floating point conditional move instructions don't directly
9935 support conditions resulting from a signed integer comparison. */
9937 code = GET_CODE (operands[1]);
9938 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9940 /* The floating point conditional move instructions don't directly
9941 support signed integer comparisons. */
9943 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9945 if (second_test != NULL || bypass_test != NULL)
9947 tmp = gen_reg_rtx (QImode);
9948 ix86_expand_setcc (code, tmp);
9950 ix86_compare_op0 = tmp;
9951 ix86_compare_op1 = const0_rtx;
9952 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9954 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9956 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9957 emit_move_insn (tmp, operands[3]);
9960 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9962 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9963 emit_move_insn (tmp, operands[2]);
9967 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9968 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9973 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9974 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9979 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9980 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9988 /* Expand conditional increment or decrement using adb/sbb instructions.
9989 The default case using setcc followed by the conditional move can be
9990 done by generic code. */
9992 ix86_expand_int_addcc (operands)
9995 enum rtx_code code = GET_CODE (operands[1]);
9997 rtx val = const0_rtx;
9999 if (operands[3] != const1_rtx
10000 && operands[3] != constm1_rtx)
10002 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10003 ix86_compare_op1, &compare_op))
10005 if (GET_CODE (compare_op) != LTU)
10007 if ((GET_CODE (compare_op) == LTU) == (operands[3] == constm1_rtx))
10009 switch (GET_MODE (operands[0]))
10012 emit_insn (gen_subqi3_carry (operands[0], operands[2], val));
10015 emit_insn (gen_subhi3_carry (operands[0], operands[2], val));
10018 emit_insn (gen_subsi3_carry (operands[0], operands[2], val));
10021 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val));
10029 switch (GET_MODE (operands[0]))
10032 emit_insn (gen_addqi3_carry (operands[0], operands[2], val));
10035 emit_insn (gen_addhi3_carry (operands[0], operands[2], val));
10038 emit_insn (gen_addsi3_carry (operands[0], operands[2], val));
10041 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val));
10047 return 1; /* DONE */
10051 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10052 works for floating pointer parameters and nonoffsetable memories.
10053 For pushes, it returns just stack offsets; the values will be saved
10054 in the right order. Maximally three parts are generated. */
10057 ix86_split_to_parts (operand, parts, mode)
10060 enum machine_mode mode;
10065 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10067 size = (GET_MODE_SIZE (mode) + 4) / 8;
10069 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10071 if (size < 2 || size > 3)
10074 /* Optimize constant pool reference to immediates. This is used by fp
10075 moves, that force all constants to memory to allow combining. */
10076 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10078 rtx tmp = maybe_get_pool_constant (operand);
10083 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10085 /* The only non-offsetable memories we handle are pushes. */
10086 if (! push_operand (operand, VOIDmode))
10089 operand = copy_rtx (operand);
10090 PUT_MODE (operand, Pmode);
10091 parts[0] = parts[1] = parts[2] = operand;
10093 else if (!TARGET_64BIT)
10095 if (mode == DImode)
10096 split_di (&operand, 1, &parts[0], &parts[1]);
10099 if (REG_P (operand))
10101 if (!reload_completed)
10103 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10104 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10106 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10108 else if (offsettable_memref_p (operand))
10110 operand = adjust_address (operand, SImode, 0);
10111 parts[0] = operand;
10112 parts[1] = adjust_address (operand, SImode, 4);
10114 parts[2] = adjust_address (operand, SImode, 8);
10116 else if (GET_CODE (operand) == CONST_DOUBLE)
10121 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10126 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10127 parts[2] = gen_int_mode (l[2], SImode);
10130 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10135 parts[1] = gen_int_mode (l[1], SImode);
10136 parts[0] = gen_int_mode (l[0], SImode);
10144 if (mode == TImode)
10145 split_ti (&operand, 1, &parts[0], &parts[1]);
10146 if (mode == XFmode || mode == TFmode)
10148 if (REG_P (operand))
10150 if (!reload_completed)
10152 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10153 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10155 else if (offsettable_memref_p (operand))
10157 operand = adjust_address (operand, DImode, 0);
10158 parts[0] = operand;
10159 parts[1] = adjust_address (operand, SImode, 8);
10161 else if (GET_CODE (operand) == CONST_DOUBLE)
10166 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10167 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10168 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10169 if (HOST_BITS_PER_WIDE_INT >= 64)
10172 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10173 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10176 parts[0] = immed_double_const (l[0], l[1], DImode);
10177 parts[1] = gen_int_mode (l[2], SImode);
10187 /* Emit insns to perform a move or push of DI, DF, and XF values.
10188 Return false when normal moves are needed; true when all required
10189 insns have been emitted. Operands 2-4 contain the input values
10190 int the correct order; operands 5-7 contain the output values. */
10193 ix86_split_long_move (operands)
10199 int collisions = 0;
10200 enum machine_mode mode = GET_MODE (operands[0]);
10202 /* The DFmode expanders may ask us to move double.
10203 For 64bit target this is single move. By hiding the fact
10204 here we simplify i386.md splitters. */
10205 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10207 /* Optimize constant pool reference to immediates. This is used by
10208 fp moves, that force all constants to memory to allow combining. */
10210 if (GET_CODE (operands[1]) == MEM
10211 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10212 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10213 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10214 if (push_operand (operands[0], VOIDmode))
10216 operands[0] = copy_rtx (operands[0]);
10217 PUT_MODE (operands[0], Pmode);
10220 operands[0] = gen_lowpart (DImode, operands[0]);
10221 operands[1] = gen_lowpart (DImode, operands[1]);
10222 emit_move_insn (operands[0], operands[1]);
10226 /* The only non-offsettable memory we handle is push. */
10227 if (push_operand (operands[0], VOIDmode))
10229 else if (GET_CODE (operands[0]) == MEM
10230 && ! offsettable_memref_p (operands[0]))
10233 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10234 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10236 /* When emitting push, take care for source operands on the stack. */
10237 if (push && GET_CODE (operands[1]) == MEM
10238 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10241 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10242 XEXP (part[1][2], 0));
10243 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10244 XEXP (part[1][1], 0));
10247 /* We need to do copy in the right order in case an address register
10248 of the source overlaps the destination. */
10249 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10251 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10253 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10256 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10259 /* Collision in the middle part can be handled by reordering. */
10260 if (collisions == 1 && nparts == 3
10261 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10264 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10265 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10268 /* If there are more collisions, we can't handle it by reordering.
10269 Do an lea to the last part and use only one colliding move. */
10270 else if (collisions > 1)
10273 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
10274 XEXP (part[1][0], 0)));
10275 part[1][0] = change_address (part[1][0],
10276 TARGET_64BIT ? DImode : SImode,
10277 part[0][nparts - 1]);
10278 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
10280 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
10290 /* We use only first 12 bytes of TFmode value, but for pushing we
10291 are required to adjust stack as if we were pushing real 16byte
10293 if (mode == TFmode && !TARGET_64BIT)
10294 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10296 emit_move_insn (part[0][2], part[1][2]);
10301 /* In 64bit mode we don't have 32bit push available. In case this is
10302 register, it is OK - we will just use larger counterpart. We also
10303 retype memory - these comes from attempt to avoid REX prefix on
10304 moving of second half of TFmode value. */
10305 if (GET_MODE (part[1][1]) == SImode)
10307 if (GET_CODE (part[1][1]) == MEM)
10308 part[1][1] = adjust_address (part[1][1], DImode, 0);
10309 else if (REG_P (part[1][1]))
10310 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10313 if (GET_MODE (part[1][0]) == SImode)
10314 part[1][0] = part[1][1];
10317 emit_move_insn (part[0][1], part[1][1]);
10318 emit_move_insn (part[0][0], part[1][0]);
10322 /* Choose correct order to not overwrite the source before it is copied. */
10323 if ((REG_P (part[0][0])
10324 && REG_P (part[1][1])
10325 && (REGNO (part[0][0]) == REGNO (part[1][1])
10327 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10329 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10333 operands[2] = part[0][2];
10334 operands[3] = part[0][1];
10335 operands[4] = part[0][0];
10336 operands[5] = part[1][2];
10337 operands[6] = part[1][1];
10338 operands[7] = part[1][0];
10342 operands[2] = part[0][1];
10343 operands[3] = part[0][0];
10344 operands[5] = part[1][1];
10345 operands[6] = part[1][0];
10352 operands[2] = part[0][0];
10353 operands[3] = part[0][1];
10354 operands[4] = part[0][2];
10355 operands[5] = part[1][0];
10356 operands[6] = part[1][1];
10357 operands[7] = part[1][2];
10361 operands[2] = part[0][0];
10362 operands[3] = part[0][1];
10363 operands[5] = part[1][0];
10364 operands[6] = part[1][1];
10367 emit_move_insn (operands[2], operands[5]);
10368 emit_move_insn (operands[3], operands[6]);
10370 emit_move_insn (operands[4], operands[7]);
10376 ix86_split_ashldi (operands, scratch)
10377 rtx *operands, scratch;
10379 rtx low[2], high[2];
10382 if (GET_CODE (operands[2]) == CONST_INT)
10384 split_di (operands, 2, low, high);
10385 count = INTVAL (operands[2]) & 63;
10389 emit_move_insn (high[0], low[1]);
10390 emit_move_insn (low[0], const0_rtx);
10393 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10397 if (!rtx_equal_p (operands[0], operands[1]))
10398 emit_move_insn (operands[0], operands[1]);
10399 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10400 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10405 if (!rtx_equal_p (operands[0], operands[1]))
10406 emit_move_insn (operands[0], operands[1]);
10408 split_di (operands, 1, low, high);
10410 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10411 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10413 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10415 if (! no_new_pseudos)
10416 scratch = force_reg (SImode, const0_rtx);
10418 emit_move_insn (scratch, const0_rtx);
10420 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10424 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10429 ix86_split_ashrdi (operands, scratch)
10430 rtx *operands, scratch;
10432 rtx low[2], high[2];
10435 if (GET_CODE (operands[2]) == CONST_INT)
10437 split_di (operands, 2, low, high);
10438 count = INTVAL (operands[2]) & 63;
10442 emit_move_insn (low[0], high[1]);
10444 if (! reload_completed)
10445 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10448 emit_move_insn (high[0], low[0]);
10449 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10453 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10457 if (!rtx_equal_p (operands[0], operands[1]))
10458 emit_move_insn (operands[0], operands[1]);
10459 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10460 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10465 if (!rtx_equal_p (operands[0], operands[1]))
10466 emit_move_insn (operands[0], operands[1]);
10468 split_di (operands, 1, low, high);
10470 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10471 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10473 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10475 if (! no_new_pseudos)
10476 scratch = gen_reg_rtx (SImode);
10477 emit_move_insn (scratch, high[0]);
10478 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10479 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10483 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10488 ix86_split_lshrdi (operands, scratch)
10489 rtx *operands, scratch;
10491 rtx low[2], high[2];
10494 if (GET_CODE (operands[2]) == CONST_INT)
10496 split_di (operands, 2, low, high);
10497 count = INTVAL (operands[2]) & 63;
10501 emit_move_insn (low[0], high[1]);
10502 emit_move_insn (high[0], const0_rtx);
10505 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10509 if (!rtx_equal_p (operands[0], operands[1]))
10510 emit_move_insn (operands[0], operands[1]);
10511 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10512 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10517 if (!rtx_equal_p (operands[0], operands[1]))
10518 emit_move_insn (operands[0], operands[1]);
10520 split_di (operands, 1, low, high);
10522 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10523 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10525 /* Heh. By reversing the arguments, we can reuse this pattern. */
10526 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10528 if (! no_new_pseudos)
10529 scratch = force_reg (SImode, const0_rtx);
10531 emit_move_insn (scratch, const0_rtx);
10533 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10537 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10541 /* Helper function for the string operations below. Dest VARIABLE whether
10542 it is aligned to VALUE bytes. If true, jump to the label. */
10544 ix86_expand_aligntest (variable, value)
10548 rtx label = gen_label_rtx ();
10549 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10550 if (GET_MODE (variable) == DImode)
10551 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10553 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10554 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10559 /* Adjust COUNTER by the VALUE. */
10561 ix86_adjust_counter (countreg, value)
10563 HOST_WIDE_INT value;
10565 if (GET_MODE (countreg) == DImode)
10566 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10568 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10571 /* Zero extend possibly SImode EXP to Pmode register. */
10573 ix86_zero_extend_to_Pmode (exp)
10577 if (GET_MODE (exp) == VOIDmode)
10578 return force_reg (Pmode, exp);
10579 if (GET_MODE (exp) == Pmode)
10580 return copy_to_mode_reg (Pmode, exp);
10581 r = gen_reg_rtx (Pmode);
10582 emit_insn (gen_zero_extendsidi2 (r, exp));
10586 /* Expand string move (memcpy) operation. Use i386 string operations when
10587 profitable. expand_clrstr contains similar code. */
10589 ix86_expand_movstr (dst, src, count_exp, align_exp)
10590 rtx dst, src, count_exp, align_exp;
10592 rtx srcreg, destreg, countreg;
10593 enum machine_mode counter_mode;
10594 HOST_WIDE_INT align = 0;
10595 unsigned HOST_WIDE_INT count = 0;
10600 if (GET_CODE (align_exp) == CONST_INT)
10601 align = INTVAL (align_exp);
10603 /* This simple hack avoids all inlining code and simplifies code below. */
10604 if (!TARGET_ALIGN_STRINGOPS)
10607 if (GET_CODE (count_exp) == CONST_INT)
10608 count = INTVAL (count_exp);
10610 /* Figure out proper mode for counter. For 32bits it is always SImode,
10611 for 64bits use SImode when possible, otherwise DImode.
10612 Set count to number of bytes copied when known at compile time. */
10613 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10614 || x86_64_zero_extended_value (count_exp))
10615 counter_mode = SImode;
10617 counter_mode = DImode;
10619 if (counter_mode != SImode && counter_mode != DImode)
10622 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10623 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10625 emit_insn (gen_cld ());
10627 /* When optimizing for size emit simple rep ; movsb instruction for
10628 counts not divisible by 4. */
10630 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10632 countreg = ix86_zero_extend_to_Pmode (count_exp);
10634 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10635 destreg, srcreg, countreg));
10637 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10638 destreg, srcreg, countreg));
10641 /* For constant aligned (or small unaligned) copies use rep movsl
10642 followed by code copying the rest. For PentiumPro ensure 8 byte
10643 alignment to allow rep movsl acceleration. */
10645 else if (count != 0
10647 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10648 || optimize_size || count < (unsigned int) 64))
10650 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10651 if (count & ~(size - 1))
10653 countreg = copy_to_mode_reg (counter_mode,
10654 GEN_INT ((count >> (size == 4 ? 2 : 3))
10655 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10656 countreg = ix86_zero_extend_to_Pmode (countreg);
10660 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10661 destreg, srcreg, countreg));
10663 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10664 destreg, srcreg, countreg));
10667 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10668 destreg, srcreg, countreg));
10670 if (size == 8 && (count & 0x04))
10671 emit_insn (gen_strmovsi (destreg, srcreg));
10673 emit_insn (gen_strmovhi (destreg, srcreg));
10675 emit_insn (gen_strmovqi (destreg, srcreg));
10677 /* The generic code based on the glibc implementation:
10678 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10679 allowing accelerated copying there)
10680 - copy the data using rep movsl
10681 - copy the rest. */
10686 int desired_alignment = (TARGET_PENTIUMPRO
10687 && (count == 0 || count >= (unsigned int) 260)
10688 ? 8 : UNITS_PER_WORD);
10690 /* In case we don't know anything about the alignment, default to
10691 library version, since it is usually equally fast and result in
10694 Also emit call when we know that the count is large and call overhead
10695 will not be important. */
10696 if (!TARGET_INLINE_ALL_STRINGOPS
10697 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10703 if (TARGET_SINGLE_STRINGOP)
10704 emit_insn (gen_cld ());
10706 countreg2 = gen_reg_rtx (Pmode);
10707 countreg = copy_to_mode_reg (counter_mode, count_exp);
10709 /* We don't use loops to align destination and to copy parts smaller
10710 than 4 bytes, because gcc is able to optimize such code better (in
10711 the case the destination or the count really is aligned, gcc is often
10712 able to predict the branches) and also it is friendlier to the
10713 hardware branch prediction.
10715 Using loops is beneficial for generic case, because we can
10716 handle small counts using the loops. Many CPUs (such as Athlon)
10717 have large REP prefix setup costs.
10719 This is quite costy. Maybe we can revisit this decision later or
10720 add some customizability to this code. */
10722 if (count == 0 && align < desired_alignment)
10724 label = gen_label_rtx ();
10725 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10726 LEU, 0, counter_mode, 1, label);
10730 rtx label = ix86_expand_aligntest (destreg, 1);
10731 emit_insn (gen_strmovqi (destreg, srcreg));
10732 ix86_adjust_counter (countreg, 1);
10733 emit_label (label);
10734 LABEL_NUSES (label) = 1;
10738 rtx label = ix86_expand_aligntest (destreg, 2);
10739 emit_insn (gen_strmovhi (destreg, srcreg));
10740 ix86_adjust_counter (countreg, 2);
10741 emit_label (label);
10742 LABEL_NUSES (label) = 1;
10744 if (align <= 4 && desired_alignment > 4)
10746 rtx label = ix86_expand_aligntest (destreg, 4);
10747 emit_insn (gen_strmovsi (destreg, srcreg));
10748 ix86_adjust_counter (countreg, 4);
10749 emit_label (label);
10750 LABEL_NUSES (label) = 1;
10753 if (label && desired_alignment > 4 && !TARGET_64BIT)
10755 emit_label (label);
10756 LABEL_NUSES (label) = 1;
10759 if (!TARGET_SINGLE_STRINGOP)
10760 emit_insn (gen_cld ());
10763 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10765 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10766 destreg, srcreg, countreg2));
10770 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10771 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10772 destreg, srcreg, countreg2));
10777 emit_label (label);
10778 LABEL_NUSES (label) = 1;
10780 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10781 emit_insn (gen_strmovsi (destreg, srcreg));
10782 if ((align <= 4 || count == 0) && TARGET_64BIT)
10784 rtx label = ix86_expand_aligntest (countreg, 4);
10785 emit_insn (gen_strmovsi (destreg, srcreg));
10786 emit_label (label);
10787 LABEL_NUSES (label) = 1;
10789 if (align > 2 && count != 0 && (count & 2))
10790 emit_insn (gen_strmovhi (destreg, srcreg));
10791 if (align <= 2 || count == 0)
10793 rtx label = ix86_expand_aligntest (countreg, 2);
10794 emit_insn (gen_strmovhi (destreg, srcreg));
10795 emit_label (label);
10796 LABEL_NUSES (label) = 1;
10798 if (align > 1 && count != 0 && (count & 1))
10799 emit_insn (gen_strmovqi (destreg, srcreg));
10800 if (align <= 1 || count == 0)
10802 rtx label = ix86_expand_aligntest (countreg, 1);
10803 emit_insn (gen_strmovqi (destreg, srcreg));
10804 emit_label (label);
10805 LABEL_NUSES (label) = 1;
10809 insns = get_insns ();
10812 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10817 /* Expand string clear operation (bzero). Use i386 string operations when
10818 profitable. expand_movstr contains similar code. */
10820 ix86_expand_clrstr (src, count_exp, align_exp)
10821 rtx src, count_exp, align_exp;
10823 rtx destreg, zeroreg, countreg;
10824 enum machine_mode counter_mode;
10825 HOST_WIDE_INT align = 0;
10826 unsigned HOST_WIDE_INT count = 0;
10828 if (GET_CODE (align_exp) == CONST_INT)
10829 align = INTVAL (align_exp);
10831 /* This simple hack avoids all inlining code and simplifies code below. */
10832 if (!TARGET_ALIGN_STRINGOPS)
10835 if (GET_CODE (count_exp) == CONST_INT)
10836 count = INTVAL (count_exp);
10837 /* Figure out proper mode for counter. For 32bits it is always SImode,
10838 for 64bits use SImode when possible, otherwise DImode.
10839 Set count to number of bytes copied when known at compile time. */
10840 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10841 || x86_64_zero_extended_value (count_exp))
10842 counter_mode = SImode;
10844 counter_mode = DImode;
10846 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10848 emit_insn (gen_cld ());
10850 /* When optimizing for size emit simple rep ; movsb instruction for
10851 counts not divisible by 4. */
10853 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10855 countreg = ix86_zero_extend_to_Pmode (count_exp);
10856 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10858 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10859 destreg, countreg));
10861 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10862 destreg, countreg));
10864 else if (count != 0
10866 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10867 || optimize_size || count < (unsigned int) 64))
10869 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10870 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10871 if (count & ~(size - 1))
10873 countreg = copy_to_mode_reg (counter_mode,
10874 GEN_INT ((count >> (size == 4 ? 2 : 3))
10875 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10876 countreg = ix86_zero_extend_to_Pmode (countreg);
10880 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10881 destreg, countreg));
10883 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10884 destreg, countreg));
10887 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10888 destreg, countreg));
10890 if (size == 8 && (count & 0x04))
10891 emit_insn (gen_strsetsi (destreg,
10892 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10894 emit_insn (gen_strsethi (destreg,
10895 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10897 emit_insn (gen_strsetqi (destreg,
10898 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10904 /* Compute desired alignment of the string operation. */
10905 int desired_alignment = (TARGET_PENTIUMPRO
10906 && (count == 0 || count >= (unsigned int) 260)
10907 ? 8 : UNITS_PER_WORD);
10909 /* In case we don't know anything about the alignment, default to
10910 library version, since it is usually equally fast and result in
10913 Also emit call when we know that the count is large and call overhead
10914 will not be important. */
10915 if (!TARGET_INLINE_ALL_STRINGOPS
10916 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10919 if (TARGET_SINGLE_STRINGOP)
10920 emit_insn (gen_cld ());
10922 countreg2 = gen_reg_rtx (Pmode);
10923 countreg = copy_to_mode_reg (counter_mode, count_exp);
10924 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10926 if (count == 0 && align < desired_alignment)
10928 label = gen_label_rtx ();
10929 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10930 LEU, 0, counter_mode, 1, label);
10934 rtx label = ix86_expand_aligntest (destreg, 1);
10935 emit_insn (gen_strsetqi (destreg,
10936 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10937 ix86_adjust_counter (countreg, 1);
10938 emit_label (label);
10939 LABEL_NUSES (label) = 1;
10943 rtx label = ix86_expand_aligntest (destreg, 2);
10944 emit_insn (gen_strsethi (destreg,
10945 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10946 ix86_adjust_counter (countreg, 2);
10947 emit_label (label);
10948 LABEL_NUSES (label) = 1;
10950 if (align <= 4 && desired_alignment > 4)
10952 rtx label = ix86_expand_aligntest (destreg, 4);
10953 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10954 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10956 ix86_adjust_counter (countreg, 4);
10957 emit_label (label);
10958 LABEL_NUSES (label) = 1;
10961 if (label && desired_alignment > 4 && !TARGET_64BIT)
10963 emit_label (label);
10964 LABEL_NUSES (label) = 1;
10968 if (!TARGET_SINGLE_STRINGOP)
10969 emit_insn (gen_cld ());
10972 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10974 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10975 destreg, countreg2));
10979 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10980 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10981 destreg, countreg2));
10985 emit_label (label);
10986 LABEL_NUSES (label) = 1;
10989 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10990 emit_insn (gen_strsetsi (destreg,
10991 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10992 if (TARGET_64BIT && (align <= 4 || count == 0))
10994 rtx label = ix86_expand_aligntest (countreg, 4);
10995 emit_insn (gen_strsetsi (destreg,
10996 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10997 emit_label (label);
10998 LABEL_NUSES (label) = 1;
11000 if (align > 2 && count != 0 && (count & 2))
11001 emit_insn (gen_strsethi (destreg,
11002 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11003 if (align <= 2 || count == 0)
11005 rtx label = ix86_expand_aligntest (countreg, 2);
11006 emit_insn (gen_strsethi (destreg,
11007 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11008 emit_label (label);
11009 LABEL_NUSES (label) = 1;
11011 if (align > 1 && count != 0 && (count & 1))
11012 emit_insn (gen_strsetqi (destreg,
11013 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11014 if (align <= 1 || count == 0)
11016 rtx label = ix86_expand_aligntest (countreg, 1);
11017 emit_insn (gen_strsetqi (destreg,
11018 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11019 emit_label (label);
11020 LABEL_NUSES (label) = 1;
11025 /* Expand strlen. */
11027 ix86_expand_strlen (out, src, eoschar, align)
11028 rtx out, src, eoschar, align;
11030 rtx addr, scratch1, scratch2, scratch3, scratch4;
11032 /* The generic case of strlen expander is long. Avoid it's
11033 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11035 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11036 && !TARGET_INLINE_ALL_STRINGOPS
11038 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11041 addr = force_reg (Pmode, XEXP (src, 0));
11042 scratch1 = gen_reg_rtx (Pmode);
11044 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11047 /* Well it seems that some optimizer does not combine a call like
11048 foo(strlen(bar), strlen(bar));
11049 when the move and the subtraction is done here. It does calculate
11050 the length just once when these instructions are done inside of
11051 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11052 often used and I use one fewer register for the lifetime of
11053 output_strlen_unroll() this is better. */
11055 emit_move_insn (out, addr);
11057 ix86_expand_strlensi_unroll_1 (out, align);
11059 /* strlensi_unroll_1 returns the address of the zero at the end of
11060 the string, like memchr(), so compute the length by subtracting
11061 the start address. */
11063 emit_insn (gen_subdi3 (out, out, addr));
11065 emit_insn (gen_subsi3 (out, out, addr));
11069 scratch2 = gen_reg_rtx (Pmode);
11070 scratch3 = gen_reg_rtx (Pmode);
11071 scratch4 = force_reg (Pmode, constm1_rtx);
11073 emit_move_insn (scratch3, addr);
11074 eoschar = force_reg (QImode, eoschar);
11076 emit_insn (gen_cld ());
11079 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11080 align, scratch4, scratch3));
11081 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11082 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11086 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11087 align, scratch4, scratch3));
11088 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11089 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11095 /* Expand the appropriate insns for doing strlen if not just doing
11098 out = result, initialized with the start address
11099 align_rtx = alignment of the address.
11100 scratch = scratch register, initialized with the startaddress when
11101 not aligned, otherwise undefined
11103 This is just the body. It needs the initialisations mentioned above and
11104 some address computing at the end. These things are done in i386.md. */
11107 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11108 rtx out, align_rtx;
11112 rtx align_2_label = NULL_RTX;
11113 rtx align_3_label = NULL_RTX;
11114 rtx align_4_label = gen_label_rtx ();
11115 rtx end_0_label = gen_label_rtx ();
11117 rtx tmpreg = gen_reg_rtx (SImode);
11118 rtx scratch = gen_reg_rtx (SImode);
11121 if (GET_CODE (align_rtx) == CONST_INT)
11122 align = INTVAL (align_rtx);
11124 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11126 /* Is there a known alignment and is it less than 4? */
11129 rtx scratch1 = gen_reg_rtx (Pmode);
11130 emit_move_insn (scratch1, out);
11131 /* Is there a known alignment and is it not 2? */
11134 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11135 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11137 /* Leave just the 3 lower bits. */
11138 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11139 NULL_RTX, 0, OPTAB_WIDEN);
11141 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11142 Pmode, 1, align_4_label);
11143 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11144 Pmode, 1, align_2_label);
11145 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11146 Pmode, 1, align_3_label);
11150 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11151 check if is aligned to 4 - byte. */
11153 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11154 NULL_RTX, 0, OPTAB_WIDEN);
11156 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11157 Pmode, 1, align_4_label);
11160 mem = gen_rtx_MEM (QImode, out);
11162 /* Now compare the bytes. */
11164 /* Compare the first n unaligned byte on a byte per byte basis. */
11165 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11166 QImode, 1, end_0_label);
11168 /* Increment the address. */
11170 emit_insn (gen_adddi3 (out, out, const1_rtx));
11172 emit_insn (gen_addsi3 (out, out, const1_rtx));
11174 /* Not needed with an alignment of 2 */
11177 emit_label (align_2_label);
11179 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11183 emit_insn (gen_adddi3 (out, out, const1_rtx));
11185 emit_insn (gen_addsi3 (out, out, const1_rtx));
11187 emit_label (align_3_label);
11190 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11194 emit_insn (gen_adddi3 (out, out, const1_rtx));
11196 emit_insn (gen_addsi3 (out, out, const1_rtx));
11199 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11200 align this loop. It gives only huge programs, but does not help to
11202 emit_label (align_4_label);
11204 mem = gen_rtx_MEM (SImode, out);
11205 emit_move_insn (scratch, mem);
11207 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11209 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11211 /* This formula yields a nonzero result iff one of the bytes is zero.
11212 This saves three branches inside loop and many cycles. */
11214 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11215 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11216 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11217 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11218 gen_int_mode (0x80808080, SImode)));
11219 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11224 rtx reg = gen_reg_rtx (SImode);
11225 rtx reg2 = gen_reg_rtx (Pmode);
11226 emit_move_insn (reg, tmpreg);
11227 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11229 /* If zero is not in the first two bytes, move two bytes forward. */
11230 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11231 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11232 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11233 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11234 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11237 /* Emit lea manually to avoid clobbering of flags. */
11238 emit_insn (gen_rtx_SET (SImode, reg2,
11239 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11241 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11242 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11243 emit_insn (gen_rtx_SET (VOIDmode, out,
11244 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11251 rtx end_2_label = gen_label_rtx ();
11252 /* Is zero in the first two bytes? */
11254 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11255 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11256 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11257 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11258 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11260 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11261 JUMP_LABEL (tmp) = end_2_label;
11263 /* Not in the first two. Move two bytes forward. */
11264 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11266 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11268 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11270 emit_label (end_2_label);
11274 /* Avoid branch in fixing the byte. */
11275 tmpreg = gen_lowpart (QImode, tmpreg);
11276 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11278 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
11280 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
11282 emit_label (end_0_label);
11286 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11287 rtx retval, fnaddr, callarg1, callarg2, pop;
11290 rtx use = NULL, call;
11292 if (pop == const0_rtx)
11294 if (TARGET_64BIT && pop)
11298 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11299 fnaddr = machopic_indirect_call_target (fnaddr);
11301 /* Static functions and indirect calls don't need the pic register. */
11302 if (! TARGET_64BIT && flag_pic
11303 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11304 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11305 use_reg (&use, pic_offset_table_rtx);
11307 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11309 rtx al = gen_rtx_REG (QImode, 0);
11310 emit_move_insn (al, callarg2);
11311 use_reg (&use, al);
11313 #endif /* TARGET_MACHO */
11315 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11317 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11318 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11320 if (sibcall && TARGET_64BIT
11321 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11324 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11325 fnaddr = gen_rtx_REG (Pmode, 40);
11326 emit_move_insn (fnaddr, addr);
11327 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11330 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11332 call = gen_rtx_SET (VOIDmode, retval, call);
11335 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11336 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11337 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11340 call = emit_call_insn (call);
11342 CALL_INSN_FUNCTION_USAGE (call) = use;
11346 /* Clear stack slot assignments remembered from previous functions.
11347 This is called from INIT_EXPANDERS once before RTL is emitted for each
11350 static struct machine_function *
11351 ix86_init_machine_status ()
11353 return ggc_alloc_cleared (sizeof (struct machine_function));
11356 /* Return a MEM corresponding to a stack slot with mode MODE.
11357 Allocate a new slot if necessary.
11359 The RTL for a function can have several slots available: N is
11360 which slot to use. */
11363 assign_386_stack_local (mode, n)
11364 enum machine_mode mode;
11367 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11370 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11371 ix86_stack_locals[(int) mode][n]
11372 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11374 return ix86_stack_locals[(int) mode][n];
11377 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11379 static GTY(()) rtx ix86_tls_symbol;
11381 ix86_tls_get_addr ()
11384 if (!ix86_tls_symbol)
11386 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11387 (TARGET_GNU_TLS && !TARGET_64BIT)
11388 ? "___tls_get_addr"
11389 : "__tls_get_addr");
11392 return ix86_tls_symbol;
11395 /* Calculate the length of the memory address in the instruction
11396 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11399 memory_address_length (addr)
11402 struct ix86_address parts;
11403 rtx base, index, disp;
11406 if (GET_CODE (addr) == PRE_DEC
11407 || GET_CODE (addr) == POST_INC
11408 || GET_CODE (addr) == PRE_MODIFY
11409 || GET_CODE (addr) == POST_MODIFY)
11412 if (! ix86_decompose_address (addr, &parts))
11416 index = parts.index;
11420 /* Register Indirect. */
11421 if (base && !index && !disp)
11423 /* Special cases: ebp and esp need the two-byte modrm form. */
11424 if (addr == stack_pointer_rtx
11425 || addr == arg_pointer_rtx
11426 || addr == frame_pointer_rtx
11427 || addr == hard_frame_pointer_rtx)
11431 /* Direct Addressing. */
11432 else if (disp && !base && !index)
11437 /* Find the length of the displacement constant. */
11440 if (GET_CODE (disp) == CONST_INT
11441 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11447 /* An index requires the two-byte modrm form. */
11455 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11456 is set, expect that insn have 8bit immediate alternative. */
11458 ix86_attr_length_immediate_default (insn, shortform)
11464 extract_insn_cached (insn);
11465 for (i = recog_data.n_operands - 1; i >= 0; --i)
11466 if (CONSTANT_P (recog_data.operand[i]))
11471 && GET_CODE (recog_data.operand[i]) == CONST_INT
11472 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11476 switch (get_attr_mode (insn))
11487 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11492 fatal_insn ("unknown insn mode", insn);
11498 /* Compute default value for "length_address" attribute. */
11500 ix86_attr_length_address_default (insn)
11504 extract_insn_cached (insn);
11505 for (i = recog_data.n_operands - 1; i >= 0; --i)
11506 if (GET_CODE (recog_data.operand[i]) == MEM)
11508 return memory_address_length (XEXP (recog_data.operand[i], 0));
11514 /* Return the maximum number of instructions a cpu can issue. */
11521 case PROCESSOR_PENTIUM:
11525 case PROCESSOR_PENTIUMPRO:
11526 case PROCESSOR_PENTIUM4:
11527 case PROCESSOR_ATHLON:
11536 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11537 by DEP_INSN and nothing set by DEP_INSN. */
11540 ix86_flags_dependant (insn, dep_insn, insn_type)
11541 rtx insn, dep_insn;
11542 enum attr_type insn_type;
11546 /* Simplify the test for uninteresting insns. */
11547 if (insn_type != TYPE_SETCC
11548 && insn_type != TYPE_ICMOV
11549 && insn_type != TYPE_FCMOV
11550 && insn_type != TYPE_IBR)
11553 if ((set = single_set (dep_insn)) != 0)
11555 set = SET_DEST (set);
11558 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11559 && XVECLEN (PATTERN (dep_insn), 0) == 2
11560 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11561 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11563 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11564 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11569 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11572 /* This test is true if the dependent insn reads the flags but
11573 not any other potentially set register. */
11574 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11577 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11583 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11584 address with operands set by DEP_INSN. */
11587 ix86_agi_dependant (insn, dep_insn, insn_type)
11588 rtx insn, dep_insn;
11589 enum attr_type insn_type;
11593 if (insn_type == TYPE_LEA
11596 addr = PATTERN (insn);
11597 if (GET_CODE (addr) == SET)
11599 else if (GET_CODE (addr) == PARALLEL
11600 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11601 addr = XVECEXP (addr, 0, 0);
11604 addr = SET_SRC (addr);
11609 extract_insn_cached (insn);
11610 for (i = recog_data.n_operands - 1; i >= 0; --i)
11611 if (GET_CODE (recog_data.operand[i]) == MEM)
11613 addr = XEXP (recog_data.operand[i], 0);
11620 return modified_in_p (addr, dep_insn);
11624 ix86_adjust_cost (insn, link, dep_insn, cost)
11625 rtx insn, link, dep_insn;
11628 enum attr_type insn_type, dep_insn_type;
11629 enum attr_memory memory, dep_memory;
11631 int dep_insn_code_number;
11633 /* Anti and output dependencies have zero cost on all CPUs. */
11634 if (REG_NOTE_KIND (link) != 0)
11637 dep_insn_code_number = recog_memoized (dep_insn);
11639 /* If we can't recognize the insns, we can't really do anything. */
11640 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11643 insn_type = get_attr_type (insn);
11644 dep_insn_type = get_attr_type (dep_insn);
11648 case PROCESSOR_PENTIUM:
11649 /* Address Generation Interlock adds a cycle of latency. */
11650 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11653 /* ??? Compares pair with jump/setcc. */
11654 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11657 /* Floating point stores require value to be ready one cycle earlier. */
11658 if (insn_type == TYPE_FMOV
11659 && get_attr_memory (insn) == MEMORY_STORE
11660 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11664 case PROCESSOR_PENTIUMPRO:
11665 memory = get_attr_memory (insn);
11666 dep_memory = get_attr_memory (dep_insn);
11668 /* Since we can't represent delayed latencies of load+operation,
11669 increase the cost here for non-imov insns. */
11670 if (dep_insn_type != TYPE_IMOV
11671 && dep_insn_type != TYPE_FMOV
11672 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11675 /* INT->FP conversion is expensive. */
11676 if (get_attr_fp_int_src (dep_insn))
11679 /* There is one cycle extra latency between an FP op and a store. */
11680 if (insn_type == TYPE_FMOV
11681 && (set = single_set (dep_insn)) != NULL_RTX
11682 && (set2 = single_set (insn)) != NULL_RTX
11683 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11684 && GET_CODE (SET_DEST (set2)) == MEM)
11687 /* Show ability of reorder buffer to hide latency of load by executing
11688 in parallel with previous instruction in case
11689 previous instruction is not needed to compute the address. */
11690 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11691 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11693 /* Claim moves to take one cycle, as core can issue one load
11694 at time and the next load can start cycle later. */
11695 if (dep_insn_type == TYPE_IMOV
11696 || dep_insn_type == TYPE_FMOV)
11704 memory = get_attr_memory (insn);
11705 dep_memory = get_attr_memory (dep_insn);
11706 /* The esp dependency is resolved before the instruction is really
11708 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11709 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11712 /* Since we can't represent delayed latencies of load+operation,
11713 increase the cost here for non-imov insns. */
11714 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11715 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11717 /* INT->FP conversion is expensive. */
11718 if (get_attr_fp_int_src (dep_insn))
11721 /* Show ability of reorder buffer to hide latency of load by executing
11722 in parallel with previous instruction in case
11723 previous instruction is not needed to compute the address. */
11724 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11725 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11727 /* Claim moves to take one cycle, as core can issue one load
11728 at time and the next load can start cycle later. */
11729 if (dep_insn_type == TYPE_IMOV
11730 || dep_insn_type == TYPE_FMOV)
11739 case PROCESSOR_ATHLON:
11741 memory = get_attr_memory (insn);
11742 dep_memory = get_attr_memory (dep_insn);
11744 /* Show ability of reorder buffer to hide latency of load by executing
11745 in parallel with previous instruction in case
11746 previous instruction is not needed to compute the address. */
11747 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11748 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11750 /* Claim moves to take one cycle, as core can issue one load
11751 at time and the next load can start cycle later. */
11752 if (dep_insn_type == TYPE_IMOV
11753 || dep_insn_type == TYPE_FMOV)
11755 else if (cost >= 3)
11770 struct ppro_sched_data
11773 int issued_this_cycle;
11777 static enum attr_ppro_uops
11778 ix86_safe_ppro_uops (insn)
11781 if (recog_memoized (insn) >= 0)
11782 return get_attr_ppro_uops (insn);
11784 return PPRO_UOPS_MANY;
11788 ix86_dump_ppro_packet (dump)
11791 if (ix86_sched_data.ppro.decode[0])
11793 fprintf (dump, "PPRO packet: %d",
11794 INSN_UID (ix86_sched_data.ppro.decode[0]));
11795 if (ix86_sched_data.ppro.decode[1])
11796 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11797 if (ix86_sched_data.ppro.decode[2])
11798 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11799 fputc ('\n', dump);
11803 /* We're beginning a new block. Initialize data structures as necessary. */
11806 ix86_sched_init (dump, sched_verbose, veclen)
11807 FILE *dump ATTRIBUTE_UNUSED;
11808 int sched_verbose ATTRIBUTE_UNUSED;
11809 int veclen ATTRIBUTE_UNUSED;
11811 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11814 /* Shift INSN to SLOT, and shift everything else down. */
11817 ix86_reorder_insn (insnp, slot)
11824 insnp[0] = insnp[1];
11825 while (++insnp != slot);
11831 ix86_sched_reorder_ppro (ready, e_ready)
11836 enum attr_ppro_uops cur_uops;
11837 int issued_this_cycle;
11841 /* At this point .ppro.decode contains the state of the three
11842 decoders from last "cycle". That is, those insns that were
11843 actually independent. But here we're scheduling for the
11844 decoder, and we may find things that are decodable in the
11847 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11848 issued_this_cycle = 0;
11851 cur_uops = ix86_safe_ppro_uops (*insnp);
11853 /* If the decoders are empty, and we've a complex insn at the
11854 head of the priority queue, let it issue without complaint. */
11855 if (decode[0] == NULL)
11857 if (cur_uops == PPRO_UOPS_MANY)
11859 decode[0] = *insnp;
11863 /* Otherwise, search for a 2-4 uop unsn to issue. */
11864 while (cur_uops != PPRO_UOPS_FEW)
11866 if (insnp == ready)
11868 cur_uops = ix86_safe_ppro_uops (*--insnp);
11871 /* If so, move it to the head of the line. */
11872 if (cur_uops == PPRO_UOPS_FEW)
11873 ix86_reorder_insn (insnp, e_ready);
11875 /* Issue the head of the queue. */
11876 issued_this_cycle = 1;
11877 decode[0] = *e_ready--;
11880 /* Look for simple insns to fill in the other two slots. */
11881 for (i = 1; i < 3; ++i)
11882 if (decode[i] == NULL)
11884 if (ready > e_ready)
11888 cur_uops = ix86_safe_ppro_uops (*insnp);
11889 while (cur_uops != PPRO_UOPS_ONE)
11891 if (insnp == ready)
11893 cur_uops = ix86_safe_ppro_uops (*--insnp);
11896 /* Found one. Move it to the head of the queue and issue it. */
11897 if (cur_uops == PPRO_UOPS_ONE)
11899 ix86_reorder_insn (insnp, e_ready);
11900 decode[i] = *e_ready--;
11901 issued_this_cycle++;
11905 /* ??? Didn't find one. Ideally, here we would do a lazy split
11906 of 2-uop insns, issue one and queue the other. */
11910 if (issued_this_cycle == 0)
11911 issued_this_cycle = 1;
11912 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11915 /* We are about to being issuing insns for this clock cycle.
11916 Override the default sort algorithm to better slot instructions. */
11918 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11919 FILE *dump ATTRIBUTE_UNUSED;
11920 int sched_verbose ATTRIBUTE_UNUSED;
11923 int clock_var ATTRIBUTE_UNUSED;
11925 int n_ready = *n_readyp;
11926 rtx *e_ready = ready + n_ready - 1;
11928 /* Make sure to go ahead and initialize key items in
11929 ix86_sched_data if we are not going to bother trying to
11930 reorder the ready queue. */
11933 ix86_sched_data.ppro.issued_this_cycle = 1;
11942 case PROCESSOR_PENTIUMPRO:
11943 ix86_sched_reorder_ppro (ready, e_ready);
11948 return ix86_issue_rate ();
11951 /* We are about to issue INSN. Return the number of insns left on the
11952 ready queue that can be issued this cycle. */
11955 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11959 int can_issue_more;
11965 return can_issue_more - 1;
11967 case PROCESSOR_PENTIUMPRO:
11969 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11971 if (uops == PPRO_UOPS_MANY)
11974 ix86_dump_ppro_packet (dump);
11975 ix86_sched_data.ppro.decode[0] = insn;
11976 ix86_sched_data.ppro.decode[1] = NULL;
11977 ix86_sched_data.ppro.decode[2] = NULL;
11979 ix86_dump_ppro_packet (dump);
11980 ix86_sched_data.ppro.decode[0] = NULL;
11982 else if (uops == PPRO_UOPS_FEW)
11985 ix86_dump_ppro_packet (dump);
11986 ix86_sched_data.ppro.decode[0] = insn;
11987 ix86_sched_data.ppro.decode[1] = NULL;
11988 ix86_sched_data.ppro.decode[2] = NULL;
11992 for (i = 0; i < 3; ++i)
11993 if (ix86_sched_data.ppro.decode[i] == NULL)
11995 ix86_sched_data.ppro.decode[i] = insn;
12003 ix86_dump_ppro_packet (dump);
12004 ix86_sched_data.ppro.decode[0] = NULL;
12005 ix86_sched_data.ppro.decode[1] = NULL;
12006 ix86_sched_data.ppro.decode[2] = NULL;
12010 return --ix86_sched_data.ppro.issued_this_cycle;
12015 ia32_use_dfa_pipeline_interface ()
12017 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12022 /* How many alternative schedules to try. This should be as wide as the
12023 scheduling freedom in the DFA, but no wider. Making this value too
12024 large results extra work for the scheduler. */
12027 ia32_multipass_dfa_lookahead ()
12029 if (ix86_cpu == PROCESSOR_PENTIUM)
12036 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12037 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12041 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12043 rtx dstref, srcref, dstreg, srcreg;
12047 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12049 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12053 /* Subroutine of above to actually do the updating by recursively walking
12057 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12059 rtx dstref, srcref, dstreg, srcreg;
12061 enum rtx_code code = GET_CODE (x);
12062 const char *format_ptr = GET_RTX_FORMAT (code);
12065 if (code == MEM && XEXP (x, 0) == dstreg)
12066 MEM_COPY_ATTRIBUTES (x, dstref);
12067 else if (code == MEM && XEXP (x, 0) == srcreg)
12068 MEM_COPY_ATTRIBUTES (x, srcref);
12070 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12072 if (*format_ptr == 'e')
12073 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12075 else if (*format_ptr == 'E')
12076 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12077 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12082 /* Compute the alignment given to a constant that is being placed in memory.
12083 EXP is the constant and ALIGN is the alignment that the object would
12085 The value of this function is used instead of that alignment to align
12089 ix86_constant_alignment (exp, align)
12093 if (TREE_CODE (exp) == REAL_CST)
12095 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12097 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12100 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12107 /* Compute the alignment for a static variable.
12108 TYPE is the data type, and ALIGN is the alignment that
12109 the object would ordinarily have. The value of this function is used
12110 instead of that alignment to align the object. */
12113 ix86_data_alignment (type, align)
12117 if (AGGREGATE_TYPE_P (type)
12118 && TYPE_SIZE (type)
12119 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12120 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12121 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12124 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12125 to 16byte boundary. */
12128 if (AGGREGATE_TYPE_P (type)
12129 && TYPE_SIZE (type)
12130 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12131 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12132 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12136 if (TREE_CODE (type) == ARRAY_TYPE)
12138 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12140 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12143 else if (TREE_CODE (type) == COMPLEX_TYPE)
12146 if (TYPE_MODE (type) == DCmode && align < 64)
12148 if (TYPE_MODE (type) == XCmode && align < 128)
12151 else if ((TREE_CODE (type) == RECORD_TYPE
12152 || TREE_CODE (type) == UNION_TYPE
12153 || TREE_CODE (type) == QUAL_UNION_TYPE)
12154 && TYPE_FIELDS (type))
12156 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12158 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12161 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12162 || TREE_CODE (type) == INTEGER_TYPE)
12164 if (TYPE_MODE (type) == DFmode && align < 64)
12166 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12173 /* Compute the alignment for a local variable.
12174 TYPE is the data type, and ALIGN is the alignment that
12175 the object would ordinarily have. The value of this macro is used
12176 instead of that alignment to align the object. */
12179 ix86_local_alignment (type, align)
12183 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12184 to 16byte boundary. */
12187 if (AGGREGATE_TYPE_P (type)
12188 && TYPE_SIZE (type)
12189 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12190 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12191 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12194 if (TREE_CODE (type) == ARRAY_TYPE)
12196 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12198 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12201 else if (TREE_CODE (type) == COMPLEX_TYPE)
12203 if (TYPE_MODE (type) == DCmode && align < 64)
12205 if (TYPE_MODE (type) == XCmode && align < 128)
12208 else if ((TREE_CODE (type) == RECORD_TYPE
12209 || TREE_CODE (type) == UNION_TYPE
12210 || TREE_CODE (type) == QUAL_UNION_TYPE)
12211 && TYPE_FIELDS (type))
12213 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12215 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12218 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12219 || TREE_CODE (type) == INTEGER_TYPE)
12222 if (TYPE_MODE (type) == DFmode && align < 64)
12224 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12230 /* Emit RTL insns to initialize the variable parts of a trampoline.
12231 FNADDR is an RTX for the address of the function's pure code.
12232 CXT is an RTX for the static chain value for the function. */
12234 x86_initialize_trampoline (tramp, fnaddr, cxt)
12235 rtx tramp, fnaddr, cxt;
12239 /* Compute offset from the end of the jmp to the target function. */
12240 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12241 plus_constant (tramp, 10),
12242 NULL_RTX, 1, OPTAB_DIRECT);
12243 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12244 gen_int_mode (0xb9, QImode));
12245 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12246 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12247 gen_int_mode (0xe9, QImode));
12248 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12253 /* Try to load address using shorter movl instead of movabs.
12254 We may want to support movq for kernel mode, but kernel does not use
12255 trampolines at the moment. */
12256 if (x86_64_zero_extended_value (fnaddr))
12258 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12259 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12260 gen_int_mode (0xbb41, HImode));
12261 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12262 gen_lowpart (SImode, fnaddr));
12267 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12268 gen_int_mode (0xbb49, HImode));
12269 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12273 /* Load static chain using movabs to r10. */
12274 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12275 gen_int_mode (0xba49, HImode));
12276 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12279 /* Jump to the r11 */
12280 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12281 gen_int_mode (0xff49, HImode));
12282 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12283 gen_int_mode (0xe3, QImode));
12285 if (offset > TRAMPOLINE_SIZE)
12289 #ifdef TRANSFER_FROM_TRAMPOLINE
12290 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12291 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12295 #define def_builtin(MASK, NAME, TYPE, CODE) \
12297 if ((MASK) & target_flags) \
12298 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12299 NULL, NULL_TREE); \
12302 struct builtin_description
12304 const unsigned int mask;
12305 const enum insn_code icode;
12306 const char *const name;
12307 const enum ix86_builtins code;
12308 const enum rtx_code comparison;
12309 const unsigned int flag;
12312 /* Used for builtins that are enabled both by -msse and -msse2. */
12313 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12315 static const struct builtin_description bdesc_comi[] =
12317 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12318 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12319 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12320 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12321 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12322 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12323 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12324 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12325 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12326 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12327 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12328 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12329 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12330 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12331 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12332 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12333 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12334 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12335 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12336 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12337 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12338 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12339 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12340 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12343 static const struct builtin_description bdesc_2arg[] =
12346 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12347 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12348 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12349 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12350 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12351 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12352 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12353 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12355 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12356 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12357 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12358 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12359 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12360 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12361 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12362 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12363 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12364 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12365 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12366 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12367 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12368 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12369 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12370 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12371 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12372 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12373 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12374 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12376 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12377 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12378 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12379 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12381 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12382 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12383 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12384 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12386 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12387 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12388 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12389 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12390 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12393 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12394 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12395 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12396 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12397 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12398 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12400 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12401 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12402 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12403 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12404 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12405 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12406 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12407 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12409 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12410 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12411 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12413 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12414 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12415 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12416 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12418 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12419 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12421 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12422 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12423 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12424 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12425 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12426 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12428 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12429 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12430 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12431 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12433 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12434 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12435 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12436 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12437 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12438 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12441 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12442 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12443 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12445 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12446 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12448 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12449 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12450 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12451 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12452 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12453 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12455 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12456 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12457 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12458 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12459 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12460 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12462 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12463 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12464 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12465 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12467 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12468 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12471 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12472 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12473 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12474 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12475 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12476 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12477 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12478 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12480 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12481 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12482 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12483 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12484 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12485 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12486 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12487 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12488 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12489 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12490 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12491 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12492 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12493 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12494 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12495 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12496 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12497 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12498 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12499 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12501 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12502 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12503 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12504 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12506 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12507 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12508 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12509 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12511 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12512 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12513 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12516 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12517 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12518 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12519 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12520 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12521 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12522 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12523 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12525 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12526 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12527 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12528 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12529 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12530 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12531 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12532 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12534 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12535 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12536 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12537 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12539 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12540 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12541 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12542 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12544 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12545 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12547 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12548 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12549 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12550 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12551 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12552 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12554 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12555 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12556 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12557 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12559 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12560 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12561 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12562 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12563 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12564 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12565 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12566 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12568 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12569 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12570 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12572 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12573 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12575 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12576 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12577 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12578 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12579 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12580 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12582 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12583 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12584 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12585 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12586 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12587 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12589 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12590 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12591 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12592 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12594 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12596 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12597 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12598 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12601 static const struct builtin_description bdesc_1arg[] =
12603 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12604 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12606 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12607 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12608 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12610 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12611 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12612 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12613 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12615 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12616 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12617 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12618 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12620 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12622 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12623 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12625 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12626 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12627 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12628 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12629 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12631 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12633 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12634 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12636 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12637 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12638 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12640 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12644 ix86_init_builtins ()
12647 ix86_init_mmx_sse_builtins ();
12650 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12651 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12654 ix86_init_mmx_sse_builtins ()
12656 const struct builtin_description * d;
12659 tree pchar_type_node = build_pointer_type (char_type_node);
12660 tree pcchar_type_node = build_pointer_type (
12661 build_type_variant (char_type_node, 1, 0));
12662 tree pfloat_type_node = build_pointer_type (float_type_node);
12663 tree pcfloat_type_node = build_pointer_type (
12664 build_type_variant (float_type_node, 1, 0));
12665 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12666 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12667 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12670 tree int_ftype_v4sf_v4sf
12671 = build_function_type_list (integer_type_node,
12672 V4SF_type_node, V4SF_type_node, NULL_TREE);
12673 tree v4si_ftype_v4sf_v4sf
12674 = build_function_type_list (V4SI_type_node,
12675 V4SF_type_node, V4SF_type_node, NULL_TREE);
12676 /* MMX/SSE/integer conversions. */
12677 tree int_ftype_v4sf
12678 = build_function_type_list (integer_type_node,
12679 V4SF_type_node, NULL_TREE);
12680 tree int_ftype_v8qi
12681 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12682 tree v4sf_ftype_v4sf_int
12683 = build_function_type_list (V4SF_type_node,
12684 V4SF_type_node, integer_type_node, NULL_TREE);
12685 tree v4sf_ftype_v4sf_v2si
12686 = build_function_type_list (V4SF_type_node,
12687 V4SF_type_node, V2SI_type_node, NULL_TREE);
12688 tree int_ftype_v4hi_int
12689 = build_function_type_list (integer_type_node,
12690 V4HI_type_node, integer_type_node, NULL_TREE);
12691 tree v4hi_ftype_v4hi_int_int
12692 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12693 integer_type_node, integer_type_node,
12695 /* Miscellaneous. */
12696 tree v8qi_ftype_v4hi_v4hi
12697 = build_function_type_list (V8QI_type_node,
12698 V4HI_type_node, V4HI_type_node, NULL_TREE);
12699 tree v4hi_ftype_v2si_v2si
12700 = build_function_type_list (V4HI_type_node,
12701 V2SI_type_node, V2SI_type_node, NULL_TREE);
12702 tree v4sf_ftype_v4sf_v4sf_int
12703 = build_function_type_list (V4SF_type_node,
12704 V4SF_type_node, V4SF_type_node,
12705 integer_type_node, NULL_TREE);
12706 tree v2si_ftype_v4hi_v4hi
12707 = build_function_type_list (V2SI_type_node,
12708 V4HI_type_node, V4HI_type_node, NULL_TREE);
12709 tree v4hi_ftype_v4hi_int
12710 = build_function_type_list (V4HI_type_node,
12711 V4HI_type_node, integer_type_node, NULL_TREE);
12712 tree v4hi_ftype_v4hi_di
12713 = build_function_type_list (V4HI_type_node,
12714 V4HI_type_node, long_long_unsigned_type_node,
12716 tree v2si_ftype_v2si_di
12717 = build_function_type_list (V2SI_type_node,
12718 V2SI_type_node, long_long_unsigned_type_node,
12720 tree void_ftype_void
12721 = build_function_type (void_type_node, void_list_node);
12722 tree void_ftype_unsigned
12723 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12724 tree unsigned_ftype_void
12725 = build_function_type (unsigned_type_node, void_list_node);
12727 = build_function_type (long_long_unsigned_type_node, void_list_node);
12728 tree v4sf_ftype_void
12729 = build_function_type (V4SF_type_node, void_list_node);
12730 tree v2si_ftype_v4sf
12731 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12732 /* Loads/stores. */
12733 tree void_ftype_v8qi_v8qi_pchar
12734 = build_function_type_list (void_type_node,
12735 V8QI_type_node, V8QI_type_node,
12736 pchar_type_node, NULL_TREE);
12737 tree v4sf_ftype_pcfloat
12738 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12739 /* @@@ the type is bogus */
12740 tree v4sf_ftype_v4sf_pv2si
12741 = build_function_type_list (V4SF_type_node,
12742 V4SF_type_node, pv2si_type_node, NULL_TREE);
12743 tree void_ftype_pv2si_v4sf
12744 = build_function_type_list (void_type_node,
12745 pv2si_type_node, V4SF_type_node, NULL_TREE);
12746 tree void_ftype_pfloat_v4sf
12747 = build_function_type_list (void_type_node,
12748 pfloat_type_node, V4SF_type_node, NULL_TREE);
12749 tree void_ftype_pdi_di
12750 = build_function_type_list (void_type_node,
12751 pdi_type_node, long_long_unsigned_type_node,
12753 tree void_ftype_pv2di_v2di
12754 = build_function_type_list (void_type_node,
12755 pv2di_type_node, V2DI_type_node, NULL_TREE);
12756 /* Normal vector unops. */
12757 tree v4sf_ftype_v4sf
12758 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12760 /* Normal vector binops. */
12761 tree v4sf_ftype_v4sf_v4sf
12762 = build_function_type_list (V4SF_type_node,
12763 V4SF_type_node, V4SF_type_node, NULL_TREE);
12764 tree v8qi_ftype_v8qi_v8qi
12765 = build_function_type_list (V8QI_type_node,
12766 V8QI_type_node, V8QI_type_node, NULL_TREE);
12767 tree v4hi_ftype_v4hi_v4hi
12768 = build_function_type_list (V4HI_type_node,
12769 V4HI_type_node, V4HI_type_node, NULL_TREE);
12770 tree v2si_ftype_v2si_v2si
12771 = build_function_type_list (V2SI_type_node,
12772 V2SI_type_node, V2SI_type_node, NULL_TREE);
12773 tree di_ftype_di_di
12774 = build_function_type_list (long_long_unsigned_type_node,
12775 long_long_unsigned_type_node,
12776 long_long_unsigned_type_node, NULL_TREE);
12778 tree v2si_ftype_v2sf
12779 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12780 tree v2sf_ftype_v2si
12781 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12782 tree v2si_ftype_v2si
12783 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12784 tree v2sf_ftype_v2sf
12785 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12786 tree v2sf_ftype_v2sf_v2sf
12787 = build_function_type_list (V2SF_type_node,
12788 V2SF_type_node, V2SF_type_node, NULL_TREE);
12789 tree v2si_ftype_v2sf_v2sf
12790 = build_function_type_list (V2SI_type_node,
12791 V2SF_type_node, V2SF_type_node, NULL_TREE);
12792 tree pint_type_node = build_pointer_type (integer_type_node);
12793 tree pcint_type_node = build_pointer_type (
12794 build_type_variant (integer_type_node, 1, 0));
12795 tree pdouble_type_node = build_pointer_type (double_type_node);
12796 tree pcdouble_type_node = build_pointer_type (
12797 build_type_variant (double_type_node, 1, 0));
12798 tree int_ftype_v2df_v2df
12799 = build_function_type_list (integer_type_node,
12800 V2DF_type_node, V2DF_type_node, NULL_TREE);
12803 = build_function_type (intTI_type_node, void_list_node);
12804 tree v2di_ftype_void
12805 = build_function_type (V2DI_type_node, void_list_node);
12806 tree ti_ftype_ti_ti
12807 = build_function_type_list (intTI_type_node,
12808 intTI_type_node, intTI_type_node, NULL_TREE);
12809 tree void_ftype_pcvoid
12810 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12812 = build_function_type_list (V2DI_type_node,
12813 long_long_unsigned_type_node, NULL_TREE);
12815 = build_function_type_list (long_long_unsigned_type_node,
12816 V2DI_type_node, NULL_TREE);
12817 tree v4sf_ftype_v4si
12818 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12819 tree v4si_ftype_v4sf
12820 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12821 tree v2df_ftype_v4si
12822 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12823 tree v4si_ftype_v2df
12824 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12825 tree v2si_ftype_v2df
12826 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12827 tree v4sf_ftype_v2df
12828 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12829 tree v2df_ftype_v2si
12830 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12831 tree v2df_ftype_v4sf
12832 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12833 tree int_ftype_v2df
12834 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12835 tree v2df_ftype_v2df_int
12836 = build_function_type_list (V2DF_type_node,
12837 V2DF_type_node, integer_type_node, NULL_TREE);
12838 tree v4sf_ftype_v4sf_v2df
12839 = build_function_type_list (V4SF_type_node,
12840 V4SF_type_node, V2DF_type_node, NULL_TREE);
12841 tree v2df_ftype_v2df_v4sf
12842 = build_function_type_list (V2DF_type_node,
12843 V2DF_type_node, V4SF_type_node, NULL_TREE);
12844 tree v2df_ftype_v2df_v2df_int
12845 = build_function_type_list (V2DF_type_node,
12846 V2DF_type_node, V2DF_type_node,
12849 tree v2df_ftype_v2df_pv2si
12850 = build_function_type_list (V2DF_type_node,
12851 V2DF_type_node, pv2si_type_node, NULL_TREE);
12852 tree void_ftype_pv2si_v2df
12853 = build_function_type_list (void_type_node,
12854 pv2si_type_node, V2DF_type_node, NULL_TREE);
12855 tree void_ftype_pdouble_v2df
12856 = build_function_type_list (void_type_node,
12857 pdouble_type_node, V2DF_type_node, NULL_TREE);
12858 tree void_ftype_pint_int
12859 = build_function_type_list (void_type_node,
12860 pint_type_node, integer_type_node, NULL_TREE);
12861 tree void_ftype_v16qi_v16qi_pchar
12862 = build_function_type_list (void_type_node,
12863 V16QI_type_node, V16QI_type_node,
12864 pchar_type_node, NULL_TREE);
12865 tree v2df_ftype_pcdouble
12866 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12867 tree v2df_ftype_v2df_v2df
12868 = build_function_type_list (V2DF_type_node,
12869 V2DF_type_node, V2DF_type_node, NULL_TREE);
12870 tree v16qi_ftype_v16qi_v16qi
12871 = build_function_type_list (V16QI_type_node,
12872 V16QI_type_node, V16QI_type_node, NULL_TREE);
12873 tree v8hi_ftype_v8hi_v8hi
12874 = build_function_type_list (V8HI_type_node,
12875 V8HI_type_node, V8HI_type_node, NULL_TREE);
12876 tree v4si_ftype_v4si_v4si
12877 = build_function_type_list (V4SI_type_node,
12878 V4SI_type_node, V4SI_type_node, NULL_TREE);
12879 tree v2di_ftype_v2di_v2di
12880 = build_function_type_list (V2DI_type_node,
12881 V2DI_type_node, V2DI_type_node, NULL_TREE);
12882 tree v2di_ftype_v2df_v2df
12883 = build_function_type_list (V2DI_type_node,
12884 V2DF_type_node, V2DF_type_node, NULL_TREE);
12885 tree v2df_ftype_v2df
12886 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12887 tree v2df_ftype_double
12888 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12889 tree v2df_ftype_double_double
12890 = build_function_type_list (V2DF_type_node,
12891 double_type_node, double_type_node, NULL_TREE);
12892 tree int_ftype_v8hi_int
12893 = build_function_type_list (integer_type_node,
12894 V8HI_type_node, integer_type_node, NULL_TREE);
12895 tree v8hi_ftype_v8hi_int_int
12896 = build_function_type_list (V8HI_type_node,
12897 V8HI_type_node, integer_type_node,
12898 integer_type_node, NULL_TREE);
12899 tree v2di_ftype_v2di_int
12900 = build_function_type_list (V2DI_type_node,
12901 V2DI_type_node, integer_type_node, NULL_TREE);
12902 tree v4si_ftype_v4si_int
12903 = build_function_type_list (V4SI_type_node,
12904 V4SI_type_node, integer_type_node, NULL_TREE);
12905 tree v8hi_ftype_v8hi_int
12906 = build_function_type_list (V8HI_type_node,
12907 V8HI_type_node, integer_type_node, NULL_TREE);
12908 tree v8hi_ftype_v8hi_v2di
12909 = build_function_type_list (V8HI_type_node,
12910 V8HI_type_node, V2DI_type_node, NULL_TREE);
12911 tree v4si_ftype_v4si_v2di
12912 = build_function_type_list (V4SI_type_node,
12913 V4SI_type_node, V2DI_type_node, NULL_TREE);
12914 tree v4si_ftype_v8hi_v8hi
12915 = build_function_type_list (V4SI_type_node,
12916 V8HI_type_node, V8HI_type_node, NULL_TREE);
12917 tree di_ftype_v8qi_v8qi
12918 = build_function_type_list (long_long_unsigned_type_node,
12919 V8QI_type_node, V8QI_type_node, NULL_TREE);
12920 tree v2di_ftype_v16qi_v16qi
12921 = build_function_type_list (V2DI_type_node,
12922 V16QI_type_node, V16QI_type_node, NULL_TREE);
12923 tree int_ftype_v16qi
12924 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12925 tree v16qi_ftype_pcchar
12926 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12927 tree void_ftype_pchar_v16qi
12928 = build_function_type_list (void_type_node,
12929 pchar_type_node, V16QI_type_node, NULL_TREE);
12930 tree v4si_ftype_pcint
12931 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12932 tree void_ftype_pcint_v4si
12933 = build_function_type_list (void_type_node,
12934 pcint_type_node, V4SI_type_node, NULL_TREE);
12935 tree v2di_ftype_v2di
12936 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12938 /* Add all builtins that are more or less simple operations on two
12940 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12942 /* Use one of the operands; the target can have a different mode for
12943 mask-generating compares. */
12944 enum machine_mode mode;
12949 mode = insn_data[d->icode].operand[1].mode;
12954 type = v16qi_ftype_v16qi_v16qi;
12957 type = v8hi_ftype_v8hi_v8hi;
12960 type = v4si_ftype_v4si_v4si;
12963 type = v2di_ftype_v2di_v2di;
12966 type = v2df_ftype_v2df_v2df;
12969 type = ti_ftype_ti_ti;
12972 type = v4sf_ftype_v4sf_v4sf;
12975 type = v8qi_ftype_v8qi_v8qi;
12978 type = v4hi_ftype_v4hi_v4hi;
12981 type = v2si_ftype_v2si_v2si;
12984 type = di_ftype_di_di;
12991 /* Override for comparisons. */
12992 if (d->icode == CODE_FOR_maskcmpv4sf3
12993 || d->icode == CODE_FOR_maskncmpv4sf3
12994 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12995 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12996 type = v4si_ftype_v4sf_v4sf;
12998 if (d->icode == CODE_FOR_maskcmpv2df3
12999 || d->icode == CODE_FOR_maskncmpv2df3
13000 || d->icode == CODE_FOR_vmmaskcmpv2df3
13001 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13002 type = v2di_ftype_v2df_v2df;
13004 def_builtin (d->mask, d->name, type, d->code);
13007 /* Add the remaining MMX insns with somewhat more complicated types. */
13008 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13009 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13010 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13011 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13012 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13014 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13015 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13016 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13018 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13019 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13021 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13022 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13024 /* comi/ucomi insns. */
13025 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13026 if (d->mask == MASK_SSE2)
13027 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13029 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13031 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13032 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13033 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13035 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13036 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13037 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13038 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13039 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13040 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13041 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13042 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13044 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13045 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13047 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13049 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13050 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13051 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13052 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13053 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13054 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13056 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13057 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13058 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13059 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13061 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13062 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13063 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13064 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13066 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13068 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13070 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13071 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13072 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13073 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13074 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13075 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13077 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13079 /* Original 3DNow! */
13080 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13081 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13082 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13083 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13084 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13085 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13086 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13087 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13088 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13089 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13090 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13091 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13092 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13093 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13094 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13095 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13096 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13097 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13098 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13099 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13101 /* 3DNow! extension as used in the Athlon CPU. */
13102 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13103 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13104 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13105 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13106 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13107 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13109 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13112 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13113 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13115 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13116 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13117 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13119 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13120 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13121 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13122 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13123 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13124 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13126 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13127 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13128 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13129 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13131 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13132 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13133 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13134 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13135 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13137 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13138 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13139 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13140 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13142 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13143 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13145 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13147 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13148 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13150 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13151 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13152 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13153 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13154 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13156 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13158 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13159 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13161 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13162 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13163 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13165 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13166 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13167 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13169 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13170 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13171 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13172 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13173 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13174 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13175 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13177 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13178 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13179 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13181 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13182 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13183 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13184 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13185 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13186 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13187 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13189 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13191 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13192 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13193 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13195 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13196 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13197 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13199 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13200 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13202 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13203 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13204 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13205 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13207 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13208 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13209 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13210 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13212 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13213 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13215 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13218 /* Errors in the source file can cause expand_expr to return const0_rtx
13219 where we expect a vector. To avoid crashing, use one of the vector
13220 clear instructions. */
13222 safe_vector_operand (x, mode)
13224 enum machine_mode mode;
13226 if (x != const0_rtx)
13228 x = gen_reg_rtx (mode);
13230 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13231 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13232 : gen_rtx_SUBREG (DImode, x, 0)));
13234 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13235 : gen_rtx_SUBREG (V4SFmode, x, 0),
13236 CONST0_RTX (V4SFmode)));
13240 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13243 ix86_expand_binop_builtin (icode, arglist, target)
13244 enum insn_code icode;
13249 tree arg0 = TREE_VALUE (arglist);
13250 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13251 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13252 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13253 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13254 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13255 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13257 if (VECTOR_MODE_P (mode0))
13258 op0 = safe_vector_operand (op0, mode0);
13259 if (VECTOR_MODE_P (mode1))
13260 op1 = safe_vector_operand (op1, mode1);
13263 || GET_MODE (target) != tmode
13264 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13265 target = gen_reg_rtx (tmode);
13267 /* In case the insn wants input operands in modes different from
13268 the result, abort. */
13269 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13272 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13273 op0 = copy_to_mode_reg (mode0, op0);
13274 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13275 op1 = copy_to_mode_reg (mode1, op1);
13277 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13278 yet one of the two must not be a memory. This is normally enforced
13279 by expanders, but we didn't bother to create one here. */
13280 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13281 op0 = copy_to_mode_reg (mode0, op0);
13283 pat = GEN_FCN (icode) (target, op0, op1);
13290 /* Subroutine of ix86_expand_builtin to take care of stores. */
13293 ix86_expand_store_builtin (icode, arglist)
13294 enum insn_code icode;
13298 tree arg0 = TREE_VALUE (arglist);
13299 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13300 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13301 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13302 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13303 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13305 if (VECTOR_MODE_P (mode1))
13306 op1 = safe_vector_operand (op1, mode1);
13308 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13310 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13311 op1 = copy_to_mode_reg (mode1, op1);
13313 pat = GEN_FCN (icode) (op0, op1);
13319 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13322 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13323 enum insn_code icode;
13329 tree arg0 = TREE_VALUE (arglist);
13330 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13331 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13332 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13335 || GET_MODE (target) != tmode
13336 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13337 target = gen_reg_rtx (tmode);
13339 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13342 if (VECTOR_MODE_P (mode0))
13343 op0 = safe_vector_operand (op0, mode0);
13345 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13346 op0 = copy_to_mode_reg (mode0, op0);
13349 pat = GEN_FCN (icode) (target, op0);
13356 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13357 sqrtss, rsqrtss, rcpss. */
13360 ix86_expand_unop1_builtin (icode, arglist, target)
13361 enum insn_code icode;
13366 tree arg0 = TREE_VALUE (arglist);
13367 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13368 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13369 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13372 || GET_MODE (target) != tmode
13373 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13374 target = gen_reg_rtx (tmode);
13376 if (VECTOR_MODE_P (mode0))
13377 op0 = safe_vector_operand (op0, mode0);
13379 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13380 op0 = copy_to_mode_reg (mode0, op0);
13383 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13384 op1 = copy_to_mode_reg (mode0, op1);
13386 pat = GEN_FCN (icode) (target, op0, op1);
13393 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13396 ix86_expand_sse_compare (d, arglist, target)
13397 const struct builtin_description *d;
13402 tree arg0 = TREE_VALUE (arglist);
13403 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13404 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13405 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13407 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13408 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13409 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13410 enum rtx_code comparison = d->comparison;
13412 if (VECTOR_MODE_P (mode0))
13413 op0 = safe_vector_operand (op0, mode0);
13414 if (VECTOR_MODE_P (mode1))
13415 op1 = safe_vector_operand (op1, mode1);
13417 /* Swap operands if we have a comparison that isn't available in
13421 rtx tmp = gen_reg_rtx (mode1);
13422 emit_move_insn (tmp, op1);
13428 || GET_MODE (target) != tmode
13429 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13430 target = gen_reg_rtx (tmode);
13432 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13433 op0 = copy_to_mode_reg (mode0, op0);
13434 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13435 op1 = copy_to_mode_reg (mode1, op1);
13437 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13438 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13445 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13448 ix86_expand_sse_comi (d, arglist, target)
13449 const struct builtin_description *d;
13454 tree arg0 = TREE_VALUE (arglist);
13455 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13456 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13457 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13459 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13460 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13461 enum rtx_code comparison = d->comparison;
13463 if (VECTOR_MODE_P (mode0))
13464 op0 = safe_vector_operand (op0, mode0);
13465 if (VECTOR_MODE_P (mode1))
13466 op1 = safe_vector_operand (op1, mode1);
13468 /* Swap operands if we have a comparison that isn't available in
13477 target = gen_reg_rtx (SImode);
13478 emit_move_insn (target, const0_rtx);
13479 target = gen_rtx_SUBREG (QImode, target, 0);
13481 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13482 op0 = copy_to_mode_reg (mode0, op0);
13483 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13484 op1 = copy_to_mode_reg (mode1, op1);
13486 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13487 pat = GEN_FCN (d->icode) (op0, op1);
13491 emit_insn (gen_rtx_SET (VOIDmode,
13492 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13493 gen_rtx_fmt_ee (comparison, QImode,
13497 return SUBREG_REG (target);
13500 /* Expand an expression EXP that calls a built-in function,
13501 with result going to TARGET if that's convenient
13502 (and in mode MODE if that's convenient).
13503 SUBTARGET may be used as the target for computing one of EXP's operands.
13504 IGNORE is nonzero if the value is to be ignored. */
13507 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13510 rtx subtarget ATTRIBUTE_UNUSED;
13511 enum machine_mode mode ATTRIBUTE_UNUSED;
13512 int ignore ATTRIBUTE_UNUSED;
13514 const struct builtin_description *d;
13516 enum insn_code icode;
13517 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13518 tree arglist = TREE_OPERAND (exp, 1);
13519 tree arg0, arg1, arg2;
13520 rtx op0, op1, op2, pat;
13521 enum machine_mode tmode, mode0, mode1, mode2;
13522 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13526 case IX86_BUILTIN_EMMS:
13527 emit_insn (gen_emms ());
13530 case IX86_BUILTIN_SFENCE:
13531 emit_insn (gen_sfence ());
13534 case IX86_BUILTIN_PEXTRW:
13535 case IX86_BUILTIN_PEXTRW128:
13536 icode = (fcode == IX86_BUILTIN_PEXTRW
13537 ? CODE_FOR_mmx_pextrw
13538 : CODE_FOR_sse2_pextrw);
13539 arg0 = TREE_VALUE (arglist);
13540 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13541 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13542 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13543 tmode = insn_data[icode].operand[0].mode;
13544 mode0 = insn_data[icode].operand[1].mode;
13545 mode1 = insn_data[icode].operand[2].mode;
13547 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13548 op0 = copy_to_mode_reg (mode0, op0);
13549 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13551 /* @@@ better error message */
13552 error ("selector must be an immediate");
13553 return gen_reg_rtx (tmode);
13556 || GET_MODE (target) != tmode
13557 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13558 target = gen_reg_rtx (tmode);
13559 pat = GEN_FCN (icode) (target, op0, op1);
13565 case IX86_BUILTIN_PINSRW:
13566 case IX86_BUILTIN_PINSRW128:
13567 icode = (fcode == IX86_BUILTIN_PINSRW
13568 ? CODE_FOR_mmx_pinsrw
13569 : CODE_FOR_sse2_pinsrw);
13570 arg0 = TREE_VALUE (arglist);
13571 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13572 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13573 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13574 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13575 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13576 tmode = insn_data[icode].operand[0].mode;
13577 mode0 = insn_data[icode].operand[1].mode;
13578 mode1 = insn_data[icode].operand[2].mode;
13579 mode2 = insn_data[icode].operand[3].mode;
13581 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13582 op0 = copy_to_mode_reg (mode0, op0);
13583 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13584 op1 = copy_to_mode_reg (mode1, op1);
13585 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13587 /* @@@ better error message */
13588 error ("selector must be an immediate");
13592 || GET_MODE (target) != tmode
13593 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13594 target = gen_reg_rtx (tmode);
13595 pat = GEN_FCN (icode) (target, op0, op1, op2);
13601 case IX86_BUILTIN_MASKMOVQ:
13602 case IX86_BUILTIN_MASKMOVDQU:
13603 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13604 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13605 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13606 : CODE_FOR_sse2_maskmovdqu));
13607 /* Note the arg order is different from the operand order. */
13608 arg1 = TREE_VALUE (arglist);
13609 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13610 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13611 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13612 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13613 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13614 mode0 = insn_data[icode].operand[0].mode;
13615 mode1 = insn_data[icode].operand[1].mode;
13616 mode2 = insn_data[icode].operand[2].mode;
13618 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13619 op0 = copy_to_mode_reg (mode0, op0);
13620 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13621 op1 = copy_to_mode_reg (mode1, op1);
13622 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13623 op2 = copy_to_mode_reg (mode2, op2);
13624 pat = GEN_FCN (icode) (op0, op1, op2);
13630 case IX86_BUILTIN_SQRTSS:
13631 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13632 case IX86_BUILTIN_RSQRTSS:
13633 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13634 case IX86_BUILTIN_RCPSS:
13635 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13637 case IX86_BUILTIN_LOADAPS:
13638 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13640 case IX86_BUILTIN_LOADUPS:
13641 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13643 case IX86_BUILTIN_STOREAPS:
13644 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13646 case IX86_BUILTIN_STOREUPS:
13647 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13649 case IX86_BUILTIN_LOADSS:
13650 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13652 case IX86_BUILTIN_STORESS:
13653 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13655 case IX86_BUILTIN_LOADHPS:
13656 case IX86_BUILTIN_LOADLPS:
13657 case IX86_BUILTIN_LOADHPD:
13658 case IX86_BUILTIN_LOADLPD:
13659 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13660 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13661 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13662 : CODE_FOR_sse2_movlpd);
13663 arg0 = TREE_VALUE (arglist);
13664 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13665 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13666 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13667 tmode = insn_data[icode].operand[0].mode;
13668 mode0 = insn_data[icode].operand[1].mode;
13669 mode1 = insn_data[icode].operand[2].mode;
13671 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13672 op0 = copy_to_mode_reg (mode0, op0);
13673 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13675 || GET_MODE (target) != tmode
13676 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13677 target = gen_reg_rtx (tmode);
13678 pat = GEN_FCN (icode) (target, op0, op1);
13684 case IX86_BUILTIN_STOREHPS:
13685 case IX86_BUILTIN_STORELPS:
13686 case IX86_BUILTIN_STOREHPD:
13687 case IX86_BUILTIN_STORELPD:
13688 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13689 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13690 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13691 : CODE_FOR_sse2_movlpd);
13692 arg0 = TREE_VALUE (arglist);
13693 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13694 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13695 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13696 mode0 = insn_data[icode].operand[1].mode;
13697 mode1 = insn_data[icode].operand[2].mode;
13699 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13700 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13701 op1 = copy_to_mode_reg (mode1, op1);
13703 pat = GEN_FCN (icode) (op0, op0, op1);
13709 case IX86_BUILTIN_MOVNTPS:
13710 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13711 case IX86_BUILTIN_MOVNTQ:
13712 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13714 case IX86_BUILTIN_LDMXCSR:
13715 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13716 target = assign_386_stack_local (SImode, 0);
13717 emit_move_insn (target, op0);
13718 emit_insn (gen_ldmxcsr (target));
13721 case IX86_BUILTIN_STMXCSR:
13722 target = assign_386_stack_local (SImode, 0);
13723 emit_insn (gen_stmxcsr (target));
13724 return copy_to_mode_reg (SImode, target);
13726 case IX86_BUILTIN_SHUFPS:
13727 case IX86_BUILTIN_SHUFPD:
13728 icode = (fcode == IX86_BUILTIN_SHUFPS
13729 ? CODE_FOR_sse_shufps
13730 : CODE_FOR_sse2_shufpd);
13731 arg0 = TREE_VALUE (arglist);
13732 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13733 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13734 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13735 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13736 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13737 tmode = insn_data[icode].operand[0].mode;
13738 mode0 = insn_data[icode].operand[1].mode;
13739 mode1 = insn_data[icode].operand[2].mode;
13740 mode2 = insn_data[icode].operand[3].mode;
13742 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13743 op0 = copy_to_mode_reg (mode0, op0);
13744 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13745 op1 = copy_to_mode_reg (mode1, op1);
13746 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13748 /* @@@ better error message */
13749 error ("mask must be an immediate");
13750 return gen_reg_rtx (tmode);
13753 || GET_MODE (target) != tmode
13754 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13755 target = gen_reg_rtx (tmode);
13756 pat = GEN_FCN (icode) (target, op0, op1, op2);
13762 case IX86_BUILTIN_PSHUFW:
13763 case IX86_BUILTIN_PSHUFD:
13764 case IX86_BUILTIN_PSHUFHW:
13765 case IX86_BUILTIN_PSHUFLW:
13766 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13767 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13768 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13769 : CODE_FOR_mmx_pshufw);
13770 arg0 = TREE_VALUE (arglist);
13771 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13772 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13773 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13774 tmode = insn_data[icode].operand[0].mode;
13775 mode1 = insn_data[icode].operand[1].mode;
13776 mode2 = insn_data[icode].operand[2].mode;
13778 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13779 op0 = copy_to_mode_reg (mode1, op0);
13780 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13782 /* @@@ better error message */
13783 error ("mask must be an immediate");
13787 || GET_MODE (target) != tmode
13788 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13789 target = gen_reg_rtx (tmode);
13790 pat = GEN_FCN (icode) (target, op0, op1);
13796 case IX86_BUILTIN_PSLLDQI128:
13797 case IX86_BUILTIN_PSRLDQI128:
13798 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13799 : CODE_FOR_sse2_lshrti3);
13800 arg0 = TREE_VALUE (arglist);
13801 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13802 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13803 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13804 tmode = insn_data[icode].operand[0].mode;
13805 mode1 = insn_data[icode].operand[1].mode;
13806 mode2 = insn_data[icode].operand[2].mode;
13808 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13810 op0 = copy_to_reg (op0);
13811 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13813 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13815 error ("shift must be an immediate");
13818 target = gen_reg_rtx (V2DImode);
13819 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13825 case IX86_BUILTIN_FEMMS:
13826 emit_insn (gen_femms ());
13829 case IX86_BUILTIN_PAVGUSB:
13830 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13832 case IX86_BUILTIN_PF2ID:
13833 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13835 case IX86_BUILTIN_PFACC:
13836 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13838 case IX86_BUILTIN_PFADD:
13839 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13841 case IX86_BUILTIN_PFCMPEQ:
13842 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13844 case IX86_BUILTIN_PFCMPGE:
13845 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13847 case IX86_BUILTIN_PFCMPGT:
13848 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13850 case IX86_BUILTIN_PFMAX:
13851 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13853 case IX86_BUILTIN_PFMIN:
13854 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13856 case IX86_BUILTIN_PFMUL:
13857 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13859 case IX86_BUILTIN_PFRCP:
13860 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13862 case IX86_BUILTIN_PFRCPIT1:
13863 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13865 case IX86_BUILTIN_PFRCPIT2:
13866 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13868 case IX86_BUILTIN_PFRSQIT1:
13869 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13871 case IX86_BUILTIN_PFRSQRT:
13872 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13874 case IX86_BUILTIN_PFSUB:
13875 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13877 case IX86_BUILTIN_PFSUBR:
13878 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13880 case IX86_BUILTIN_PI2FD:
13881 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13883 case IX86_BUILTIN_PMULHRW:
13884 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13886 case IX86_BUILTIN_PF2IW:
13887 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13889 case IX86_BUILTIN_PFNACC:
13890 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13892 case IX86_BUILTIN_PFPNACC:
13893 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13895 case IX86_BUILTIN_PI2FW:
13896 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13898 case IX86_BUILTIN_PSWAPDSI:
13899 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13901 case IX86_BUILTIN_PSWAPDSF:
13902 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13904 case IX86_BUILTIN_SSE_ZERO:
13905 target = gen_reg_rtx (V4SFmode);
13906 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13909 case IX86_BUILTIN_MMX_ZERO:
13910 target = gen_reg_rtx (DImode);
13911 emit_insn (gen_mmx_clrdi (target));
13914 case IX86_BUILTIN_CLRTI:
13915 target = gen_reg_rtx (V2DImode);
13916 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13920 case IX86_BUILTIN_SQRTSD:
13921 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13922 case IX86_BUILTIN_LOADAPD:
13923 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13924 case IX86_BUILTIN_LOADUPD:
13925 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13927 case IX86_BUILTIN_STOREAPD:
13928 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13929 case IX86_BUILTIN_STOREUPD:
13930 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13932 case IX86_BUILTIN_LOADSD:
13933 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13935 case IX86_BUILTIN_STORESD:
13936 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13938 case IX86_BUILTIN_SETPD1:
13939 target = assign_386_stack_local (DFmode, 0);
13940 arg0 = TREE_VALUE (arglist);
13941 emit_move_insn (adjust_address (target, DFmode, 0),
13942 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13943 op0 = gen_reg_rtx (V2DFmode);
13944 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13945 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13948 case IX86_BUILTIN_SETPD:
13949 target = assign_386_stack_local (V2DFmode, 0);
13950 arg0 = TREE_VALUE (arglist);
13951 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13952 emit_move_insn (adjust_address (target, DFmode, 0),
13953 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13954 emit_move_insn (adjust_address (target, DFmode, 8),
13955 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13956 op0 = gen_reg_rtx (V2DFmode);
13957 emit_insn (gen_sse2_movapd (op0, target));
13960 case IX86_BUILTIN_LOADRPD:
13961 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13962 gen_reg_rtx (V2DFmode), 1);
13963 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13966 case IX86_BUILTIN_LOADPD1:
13967 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13968 gen_reg_rtx (V2DFmode), 1);
13969 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13972 case IX86_BUILTIN_STOREPD1:
13973 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13974 case IX86_BUILTIN_STORERPD:
13975 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13977 case IX86_BUILTIN_CLRPD:
13978 target = gen_reg_rtx (V2DFmode);
13979 emit_insn (gen_sse_clrv2df (target));
13982 case IX86_BUILTIN_MFENCE:
13983 emit_insn (gen_sse2_mfence ());
13985 case IX86_BUILTIN_LFENCE:
13986 emit_insn (gen_sse2_lfence ());
13989 case IX86_BUILTIN_CLFLUSH:
13990 arg0 = TREE_VALUE (arglist);
13991 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13992 icode = CODE_FOR_sse2_clflush;
13993 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13994 op0 = copy_to_mode_reg (Pmode, op0);
13996 emit_insn (gen_sse2_clflush (op0));
13999 case IX86_BUILTIN_MOVNTPD:
14000 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14001 case IX86_BUILTIN_MOVNTDQ:
14002 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14003 case IX86_BUILTIN_MOVNTI:
14004 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14006 case IX86_BUILTIN_LOADDQA:
14007 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14008 case IX86_BUILTIN_LOADDQU:
14009 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14010 case IX86_BUILTIN_LOADD:
14011 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14013 case IX86_BUILTIN_STOREDQA:
14014 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14015 case IX86_BUILTIN_STOREDQU:
14016 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14017 case IX86_BUILTIN_STORED:
14018 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14024 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14025 if (d->code == fcode)
14027 /* Compares are treated specially. */
14028 if (d->icode == CODE_FOR_maskcmpv4sf3
14029 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14030 || d->icode == CODE_FOR_maskncmpv4sf3
14031 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14032 || d->icode == CODE_FOR_maskcmpv2df3
14033 || d->icode == CODE_FOR_vmmaskcmpv2df3
14034 || d->icode == CODE_FOR_maskncmpv2df3
14035 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14036 return ix86_expand_sse_compare (d, arglist, target);
14038 return ix86_expand_binop_builtin (d->icode, arglist, target);
14041 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14042 if (d->code == fcode)
14043 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14045 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14046 if (d->code == fcode)
14047 return ix86_expand_sse_comi (d, arglist, target);
14049 /* @@@ Should really do something sensible here. */
14053 /* Store OPERAND to the memory after reload is completed. This means
14054 that we can't easily use assign_stack_local. */
14056 ix86_force_to_memory (mode, operand)
14057 enum machine_mode mode;
14061 if (!reload_completed)
14063 if (TARGET_64BIT && TARGET_RED_ZONE)
14065 result = gen_rtx_MEM (mode,
14066 gen_rtx_PLUS (Pmode,
14068 GEN_INT (-RED_ZONE_SIZE)));
14069 emit_move_insn (result, operand);
14071 else if (TARGET_64BIT && !TARGET_RED_ZONE)
14077 operand = gen_lowpart (DImode, operand);
14081 gen_rtx_SET (VOIDmode,
14082 gen_rtx_MEM (DImode,
14083 gen_rtx_PRE_DEC (DImode,
14084 stack_pointer_rtx)),
14090 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14099 split_di (&operand, 1, operands, operands + 1);
14101 gen_rtx_SET (VOIDmode,
14102 gen_rtx_MEM (SImode,
14103 gen_rtx_PRE_DEC (Pmode,
14104 stack_pointer_rtx)),
14107 gen_rtx_SET (VOIDmode,
14108 gen_rtx_MEM (SImode,
14109 gen_rtx_PRE_DEC (Pmode,
14110 stack_pointer_rtx)),
14115 /* It is better to store HImodes as SImodes. */
14116 if (!TARGET_PARTIAL_REG_STALL)
14117 operand = gen_lowpart (SImode, operand);
14121 gen_rtx_SET (VOIDmode,
14122 gen_rtx_MEM (GET_MODE (operand),
14123 gen_rtx_PRE_DEC (SImode,
14124 stack_pointer_rtx)),
14130 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14135 /* Free operand from the memory. */
14137 ix86_free_from_memory (mode)
14138 enum machine_mode mode;
14140 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14144 if (mode == DImode || TARGET_64BIT)
14146 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14150 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14151 to pop or add instruction if registers are available. */
14152 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14153 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14158 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14159 QImode must go into class Q_REGS.
14160 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14161 movdf to do mem-to-mem moves through integer regs. */
14163 ix86_preferred_reload_class (x, class)
14165 enum reg_class class;
14167 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14169 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14171 /* SSE can't load any constant directly yet. */
14172 if (SSE_CLASS_P (class))
14174 /* Floats can load 0 and 1. */
14175 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14177 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14178 if (MAYBE_SSE_CLASS_P (class))
14179 return (reg_class_subset_p (class, GENERAL_REGS)
14180 ? GENERAL_REGS : FLOAT_REGS);
14184 /* General regs can load everything. */
14185 if (reg_class_subset_p (class, GENERAL_REGS))
14186 return GENERAL_REGS;
14187 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14188 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14191 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14193 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14198 /* If we are copying between general and FP registers, we need a memory
14199 location. The same is true for SSE and MMX registers.
14201 The macro can't work reliably when one of the CLASSES is class containing
14202 registers from multiple units (SSE, MMX, integer). We avoid this by never
14203 combining those units in single alternative in the machine description.
14204 Ensure that this constraint holds to avoid unexpected surprises.
14206 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14207 enforce these sanity checks. */
14209 ix86_secondary_memory_needed (class1, class2, mode, strict)
14210 enum reg_class class1, class2;
14211 enum machine_mode mode;
14214 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14215 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14216 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14217 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14218 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14219 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14226 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14227 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14228 && (mode) != SImode)
14229 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14230 && (mode) != SImode));
14232 /* Return the cost of moving data from a register in class CLASS1 to
14233 one in class CLASS2.
14235 It is not required that the cost always equal 2 when FROM is the same as TO;
14236 on some machines it is expensive to move between registers if they are not
14237 general registers. */
14239 ix86_register_move_cost (mode, class1, class2)
14240 enum machine_mode mode;
14241 enum reg_class class1, class2;
14243 /* In case we require secondary memory, compute cost of the store followed
14244 by load. In order to avoid bad register allocation choices, we need
14245 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14247 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14251 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14252 MEMORY_MOVE_COST (mode, class1, 1));
14253 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14254 MEMORY_MOVE_COST (mode, class2, 1));
14256 /* In case of copying from general_purpose_register we may emit multiple
14257 stores followed by single load causing memory size mismatch stall.
14258 Count this as arbitrarily high cost of 20. */
14259 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14262 /* In the case of FP/MMX moves, the registers actually overlap, and we
14263 have to switch modes in order to treat them differently. */
14264 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14265 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14271 /* Moves between SSE/MMX and integer unit are expensive. */
14272 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14273 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14274 return ix86_cost->mmxsse_to_integer;
14275 if (MAYBE_FLOAT_CLASS_P (class1))
14276 return ix86_cost->fp_move;
14277 if (MAYBE_SSE_CLASS_P (class1))
14278 return ix86_cost->sse_move;
14279 if (MAYBE_MMX_CLASS_P (class1))
14280 return ix86_cost->mmx_move;
14284 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14286 ix86_hard_regno_mode_ok (regno, mode)
14288 enum machine_mode mode;
14290 /* Flags and only flags can only hold CCmode values. */
14291 if (CC_REGNO_P (regno))
14292 return GET_MODE_CLASS (mode) == MODE_CC;
14293 if (GET_MODE_CLASS (mode) == MODE_CC
14294 || GET_MODE_CLASS (mode) == MODE_RANDOM
14295 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14297 if (FP_REGNO_P (regno))
14298 return VALID_FP_MODE_P (mode);
14299 if (SSE_REGNO_P (regno))
14300 return VALID_SSE_REG_MODE (mode);
14301 if (MMX_REGNO_P (regno))
14302 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
14303 /* We handle both integer and floats in the general purpose registers.
14304 In future we should be able to handle vector modes as well. */
14305 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14307 /* Take care for QImode values - they can be in non-QI regs, but then
14308 they do cause partial register stalls. */
14309 if (regno < 4 || mode != QImode || TARGET_64BIT)
14311 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14314 /* Return the cost of moving data of mode M between a
14315 register and memory. A value of 2 is the default; this cost is
14316 relative to those in `REGISTER_MOVE_COST'.
14318 If moving between registers and memory is more expensive than
14319 between two registers, you should define this macro to express the
14322 Model also increased moving costs of QImode registers in non
14326 ix86_memory_move_cost (mode, class, in)
14327 enum machine_mode mode;
14328 enum reg_class class;
14331 if (FLOAT_CLASS_P (class))
14349 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14351 if (SSE_CLASS_P (class))
14354 switch (GET_MODE_SIZE (mode))
14368 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14370 if (MMX_CLASS_P (class))
14373 switch (GET_MODE_SIZE (mode))
14384 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14386 switch (GET_MODE_SIZE (mode))
14390 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14391 : ix86_cost->movzbl_load);
14393 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14394 : ix86_cost->int_store[0] + 4);
14397 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14399 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14400 if (mode == TFmode)
14402 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14403 * ((int) GET_MODE_SIZE (mode)
14404 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14408 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14410 ix86_svr3_asm_out_constructor (symbol, priority)
14412 int priority ATTRIBUTE_UNUSED;
14415 fputs ("\tpushl $", asm_out_file);
14416 assemble_name (asm_out_file, XSTR (symbol, 0));
14417 fputc ('\n', asm_out_file);
14423 static int current_machopic_label_num;
14425 /* Given a symbol name and its associated stub, write out the
14426 definition of the stub. */
14429 machopic_output_stub (file, symb, stub)
14431 const char *symb, *stub;
14433 unsigned int length;
14434 char *binder_name, *symbol_name, lazy_ptr_name[32];
14435 int label = ++current_machopic_label_num;
14437 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14438 symb = (*targetm.strip_name_encoding) (symb);
14440 length = strlen (stub);
14441 binder_name = alloca (length + 32);
14442 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14444 length = strlen (symb);
14445 symbol_name = alloca (length + 32);
14446 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14448 sprintf (lazy_ptr_name, "L%d$lz", label);
14451 machopic_picsymbol_stub_section ();
14453 machopic_symbol_stub_section ();
14455 fprintf (file, "%s:\n", stub);
14456 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14460 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14461 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14462 fprintf (file, "\tjmp %%edx\n");
14465 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14467 fprintf (file, "%s:\n", binder_name);
14471 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14472 fprintf (file, "\tpushl %%eax\n");
14475 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14477 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14479 machopic_lazy_symbol_ptr_section ();
14480 fprintf (file, "%s:\n", lazy_ptr_name);
14481 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14482 fprintf (file, "\t.long %s\n", binder_name);
14484 #endif /* TARGET_MACHO */
14486 /* Order the registers for register allocator. */
14489 x86_order_regs_for_local_alloc ()
14494 /* First allocate the local general purpose registers. */
14495 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14496 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14497 reg_alloc_order [pos++] = i;
14499 /* Global general purpose registers. */
14500 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14501 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14502 reg_alloc_order [pos++] = i;
14504 /* x87 registers come first in case we are doing FP math
14506 if (!TARGET_SSE_MATH)
14507 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14508 reg_alloc_order [pos++] = i;
14510 /* SSE registers. */
14511 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14512 reg_alloc_order [pos++] = i;
14513 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14514 reg_alloc_order [pos++] = i;
14516 /* x87 registers. */
14517 if (TARGET_SSE_MATH)
14518 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14519 reg_alloc_order [pos++] = i;
14521 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14522 reg_alloc_order [pos++] = i;
14524 /* Initialize the rest of array as we do not allocate some registers
14526 while (pos < FIRST_PSEUDO_REGISTER)
14527 reg_alloc_order [pos++] = 0;
14530 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14531 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14535 ix86_ms_bitfield_layout_p (record_type)
14536 tree record_type ATTRIBUTE_UNUSED;
14538 return TARGET_USE_MS_BITFIELD_LAYOUT;
14541 /* Returns an expression indicating where the this parameter is
14542 located on entry to the FUNCTION. */
14545 x86_this_parameter (function)
14548 tree type = TREE_TYPE (function);
14552 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14553 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14556 if (ix86_fntype_regparm (type) > 0)
14560 parm = TYPE_ARG_TYPES (type);
14561 /* Figure out whether or not the function has a variable number of
14563 for (; parm; parm = TREE_CHAIN (parm))
14564 if (TREE_VALUE (parm) == void_type_node)
14566 /* If not, the this parameter is in %eax. */
14568 return gen_rtx_REG (SImode, 0);
14571 if (aggregate_value_p (TREE_TYPE (type)))
14572 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14574 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14577 /* Determine whether x86_output_mi_thunk can succeed. */
14580 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14581 tree thunk ATTRIBUTE_UNUSED;
14582 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14583 HOST_WIDE_INT vcall_offset;
14586 /* 64-bit can handle anything. */
14590 /* For 32-bit, everything's fine if we have one free register. */
14591 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14594 /* Need a free register for vcall_offset. */
14598 /* Need a free register for GOT references. */
14599 if (flag_pic && !(*targetm.binds_local_p) (function))
14602 /* Otherwise ok. */
14606 /* Output the assembler code for a thunk function. THUNK_DECL is the
14607 declaration for the thunk function itself, FUNCTION is the decl for
14608 the target function. DELTA is an immediate constant offset to be
14609 added to THIS. If VCALL_OFFSET is nonzero, the word at
14610 *(*this + vcall_offset) should be added to THIS. */
14613 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14614 FILE *file ATTRIBUTE_UNUSED;
14615 tree thunk ATTRIBUTE_UNUSED;
14616 HOST_WIDE_INT delta;
14617 HOST_WIDE_INT vcall_offset;
14621 rtx this = x86_this_parameter (function);
14624 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14625 pull it in now and let DELTA benefit. */
14628 else if (vcall_offset)
14630 /* Put the this parameter into %eax. */
14632 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14633 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14636 this_reg = NULL_RTX;
14638 /* Adjust the this parameter by a fixed constant. */
14641 xops[0] = GEN_INT (delta);
14642 xops[1] = this_reg ? this_reg : this;
14645 if (!x86_64_general_operand (xops[0], DImode))
14647 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14649 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14653 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14656 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14659 /* Adjust the this parameter by a value stored in the vtable. */
14663 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14665 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14667 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14670 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14672 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14674 /* Adjust the this parameter. */
14675 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14676 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14678 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14679 xops[0] = GEN_INT (vcall_offset);
14681 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14682 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14684 xops[1] = this_reg;
14686 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14688 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14691 /* If necessary, drop THIS back to its stack slot. */
14692 if (this_reg && this_reg != this)
14694 xops[0] = this_reg;
14696 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14699 xops[0] = DECL_RTL (function);
14702 if (!flag_pic || (*targetm.binds_local_p) (function))
14703 output_asm_insn ("jmp\t%P0", xops);
14706 tmp = XEXP (xops[0], 0);
14707 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
14708 tmp = gen_rtx_CONST (Pmode, tmp);
14709 tmp = gen_rtx_MEM (QImode, tmp);
14711 output_asm_insn ("jmp\t%A0", xops);
14716 if (!flag_pic || (*targetm.binds_local_p) (function))
14717 output_asm_insn ("jmp\t%P0", xops);
14720 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14721 output_set_got (tmp);
14724 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14725 output_asm_insn ("jmp\t{*}%1", xops);
14731 x86_field_alignment (field, computed)
14735 enum machine_mode mode;
14736 tree type = TREE_TYPE (field);
14738 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14740 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14741 ? get_inner_array_type (type) : type);
14742 if (mode == DFmode || mode == DCmode
14743 || GET_MODE_CLASS (mode) == MODE_INT
14744 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14745 return MIN (32, computed);
14749 /* Output assembler code to FILE to increment profiler label # LABELNO
14750 for profiling a function entry. */
14752 x86_function_profiler (file, labelno)
14754 int labelno ATTRIBUTE_UNUSED;
14759 #ifndef NO_PROFILE_COUNTERS
14760 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14762 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14766 #ifndef NO_PROFILE_COUNTERS
14767 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14769 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14773 #ifndef NO_PROFILE_COUNTERS
14774 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14775 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14777 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14781 #ifndef NO_PROFILE_COUNTERS
14782 fprintf (file, "\tmovl\t$%sP%d,%%$s\n", LPREFIX, labelno,
14783 PROFILE_COUNT_REGISTER);
14785 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14789 /* Implement machine specific optimizations.
14790 At the moment we implement single transformation: AMD Athlon works faster
14791 when RET is not destination of conditional jump or directly preceded
14792 by other jump instruction. We avoid the penalty by inserting NOP just
14793 before the RET instructions in such cases. */
14795 x86_machine_dependent_reorg (first)
14796 rtx first ATTRIBUTE_UNUSED;
14800 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
14802 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14804 basic_block bb = e->src;
14807 bool insert = false;
14809 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14811 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
14812 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
14814 if (prev && GET_CODE (prev) == CODE_LABEL)
14817 for (e = bb->pred; e; e = e->pred_next)
14818 if (EDGE_FREQUENCY (e) && e->src->index >= 0
14819 && !(e->flags & EDGE_FALLTHRU))
14824 prev = prev_active_insn (ret);
14825 if (prev && GET_CODE (prev) == JUMP_INSN
14826 && any_condjump_p (prev))
14828 /* Empty functions get branch misspredict even when the jump destination
14829 is not visible to us. */
14830 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
14834 emit_insn_before (gen_nop (), ret);
14838 /* Return nonzero when QImode register that must be represented via REX prefix
14841 x86_extended_QIreg_mentioned_p (insn)
14845 extract_insn_cached (insn);
14846 for (i = 0; i < recog_data.n_operands; i++)
14847 if (REG_P (recog_data.operand[i])
14848 && REGNO (recog_data.operand[i]) >= 4)
14853 /* Return nonzero when P points to register encoded via REX prefix.
14854 Called via for_each_rtx. */
14856 extended_reg_mentioned_1 (p, data)
14858 void *data ATTRIBUTE_UNUSED;
14860 unsigned int regno;
14863 regno = REGNO (*p);
14864 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
14867 /* Return true when INSN mentions register that must be encoded using REX
14870 x86_extended_reg_mentioned_p (insn)
14873 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
14876 #include "gt-i386.h"