1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
53 /* Processor costs (relative to an add) */
55 struct processor_costs size_cost = { /* costs for tunning for size */
56 2, /* cost of an add instruction */
57 3, /* cost of a lea instruction */
58 2, /* variable shift costs */
59 3, /* constant shift costs */
60 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
61 0, /* cost of multiply per each bit set */
62 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
63 3, /* cost of movsx */
64 3, /* cost of movzx */
67 2, /* cost for loading QImode using movzbl */
68 {2, 2, 2}, /* cost of loading integer registers
69 in QImode, HImode and SImode.
70 Relative to reg-reg move (2). */
71 {2, 2, 2}, /* cost of storing integer registers */
72 2, /* cost of reg,reg fld/fst */
73 {2, 2, 2}, /* cost of loading fp registers
74 in SFmode, DFmode and XFmode */
75 {2, 2, 2}, /* cost of loading integer registers */
76 3, /* cost of moving MMX register */
77 {3, 3}, /* cost of loading MMX registers
78 in SImode and DImode */
79 {3, 3}, /* cost of storing MMX registers
80 in SImode and DImode */
81 3, /* cost of moving SSE register */
82 {3, 3, 3}, /* cost of loading SSE registers
83 in SImode, DImode and TImode */
84 {3, 3, 3}, /* cost of storing SSE registers
85 in SImode, DImode and TImode */
86 3, /* MMX or SSE register to integer */
87 0, /* size of prefetch block */
88 0, /* number of parallel prefetches */
90 2, /* cost of FADD and FSUB insns. */
91 2, /* cost of FMUL instruction. */
92 2, /* cost of FDIV instruction. */
93 2, /* cost of FABS instruction. */
94 2, /* cost of FCHS instruction. */
95 2, /* cost of FSQRT instruction. */
98 /* Processor costs (relative to an add) */
100 struct processor_costs i386_cost = { /* 386 specific costs */
101 1, /* cost of an add instruction */
102 1, /* cost of a lea instruction */
103 3, /* variable shift costs */
104 2, /* constant shift costs */
105 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
106 1, /* cost of multiply per each bit set */
107 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
108 3, /* cost of movsx */
109 2, /* cost of movzx */
110 15, /* "large" insn */
112 4, /* cost for loading QImode using movzbl */
113 {2, 4, 2}, /* cost of loading integer registers
114 in QImode, HImode and SImode.
115 Relative to reg-reg move (2). */
116 {2, 4, 2}, /* cost of storing integer registers */
117 2, /* cost of reg,reg fld/fst */
118 {8, 8, 8}, /* cost of loading fp registers
119 in SFmode, DFmode and XFmode */
120 {8, 8, 8}, /* cost of loading integer registers */
121 2, /* cost of moving MMX register */
122 {4, 8}, /* cost of loading MMX registers
123 in SImode and DImode */
124 {4, 8}, /* cost of storing MMX registers
125 in SImode and DImode */
126 2, /* cost of moving SSE register */
127 {4, 8, 16}, /* cost of loading SSE registers
128 in SImode, DImode and TImode */
129 {4, 8, 16}, /* cost of storing SSE registers
130 in SImode, DImode and TImode */
131 3, /* MMX or SSE register to integer */
132 0, /* size of prefetch block */
133 0, /* number of parallel prefetches */
135 23, /* cost of FADD and FSUB insns. */
136 27, /* cost of FMUL instruction. */
137 88, /* cost of FDIV instruction. */
138 22, /* cost of FABS instruction. */
139 24, /* cost of FCHS instruction. */
140 122, /* cost of FSQRT instruction. */
144 struct processor_costs i486_cost = { /* 486 specific costs */
145 1, /* cost of an add instruction */
146 1, /* cost of a lea instruction */
147 3, /* variable shift costs */
148 2, /* constant shift costs */
149 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
150 1, /* cost of multiply per each bit set */
151 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
152 3, /* cost of movsx */
153 2, /* cost of movzx */
154 15, /* "large" insn */
156 4, /* cost for loading QImode using movzbl */
157 {2, 4, 2}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 4, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {8, 8, 8}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {8, 8, 8}, /* cost of loading integer registers */
165 2, /* cost of moving MMX register */
166 {4, 8}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {4, 8}, /* cost of storing MMX registers
169 in SImode and DImode */
170 2, /* cost of moving SSE register */
171 {4, 8, 16}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {4, 8, 16}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
175 3, /* MMX or SSE register to integer */
176 0, /* size of prefetch block */
177 0, /* number of parallel prefetches */
179 8, /* cost of FADD and FSUB insns. */
180 16, /* cost of FMUL instruction. */
181 73, /* cost of FDIV instruction. */
182 3, /* cost of FABS instruction. */
183 3, /* cost of FCHS instruction. */
184 83, /* cost of FSQRT instruction. */
188 struct processor_costs pentium_cost = {
189 1, /* cost of an add instruction */
190 1, /* cost of a lea instruction */
191 4, /* variable shift costs */
192 1, /* constant shift costs */
193 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
194 0, /* cost of multiply per each bit set */
195 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
196 3, /* cost of movsx */
197 2, /* cost of movzx */
198 8, /* "large" insn */
200 6, /* cost for loading QImode using movzbl */
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
203 Relative to reg-reg move (2). */
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {2, 2, 6}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
208 {4, 4, 6}, /* cost of loading integer registers */
209 8, /* cost of moving MMX register */
210 {8, 8}, /* cost of loading MMX registers
211 in SImode and DImode */
212 {8, 8}, /* cost of storing MMX registers
213 in SImode and DImode */
214 2, /* cost of moving SSE register */
215 {4, 8, 16}, /* cost of loading SSE registers
216 in SImode, DImode and TImode */
217 {4, 8, 16}, /* cost of storing SSE registers
218 in SImode, DImode and TImode */
219 3, /* MMX or SSE register to integer */
220 0, /* size of prefetch block */
221 0, /* number of parallel prefetches */
223 3, /* cost of FADD and FSUB insns. */
224 3, /* cost of FMUL instruction. */
225 39, /* cost of FDIV instruction. */
226 1, /* cost of FABS instruction. */
227 1, /* cost of FCHS instruction. */
228 70, /* cost of FSQRT instruction. */
232 struct processor_costs pentiumpro_cost = {
233 1, /* cost of an add instruction */
234 1, /* cost of a lea instruction */
235 1, /* variable shift costs */
236 1, /* constant shift costs */
237 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
238 0, /* cost of multiply per each bit set */
239 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
240 1, /* cost of movsx */
241 1, /* cost of movzx */
242 8, /* "large" insn */
244 2, /* cost for loading QImode using movzbl */
245 {4, 4, 4}, /* cost of loading integer registers
246 in QImode, HImode and SImode.
247 Relative to reg-reg move (2). */
248 {2, 2, 2}, /* cost of storing integer registers */
249 2, /* cost of reg,reg fld/fst */
250 {2, 2, 6}, /* cost of loading fp registers
251 in SFmode, DFmode and XFmode */
252 {4, 4, 6}, /* cost of loading integer registers */
253 2, /* cost of moving MMX register */
254 {2, 2}, /* cost of loading MMX registers
255 in SImode and DImode */
256 {2, 2}, /* cost of storing MMX registers
257 in SImode and DImode */
258 2, /* cost of moving SSE register */
259 {2, 2, 8}, /* cost of loading SSE registers
260 in SImode, DImode and TImode */
261 {2, 2, 8}, /* cost of storing SSE registers
262 in SImode, DImode and TImode */
263 3, /* MMX or SSE register to integer */
264 32, /* size of prefetch block */
265 6, /* number of parallel prefetches */
267 3, /* cost of FADD and FSUB insns. */
268 5, /* cost of FMUL instruction. */
269 56, /* cost of FDIV instruction. */
270 2, /* cost of FABS instruction. */
271 2, /* cost of FCHS instruction. */
272 56, /* cost of FSQRT instruction. */
276 struct processor_costs k6_cost = {
277 1, /* cost of an add instruction */
278 2, /* cost of a lea instruction */
279 1, /* variable shift costs */
280 1, /* constant shift costs */
281 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
282 0, /* cost of multiply per each bit set */
283 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
284 2, /* cost of movsx */
285 2, /* cost of movzx */
286 8, /* "large" insn */
288 3, /* cost for loading QImode using movzbl */
289 {4, 5, 4}, /* cost of loading integer registers
290 in QImode, HImode and SImode.
291 Relative to reg-reg move (2). */
292 {2, 3, 2}, /* cost of storing integer registers */
293 4, /* cost of reg,reg fld/fst */
294 {6, 6, 6}, /* cost of loading fp registers
295 in SFmode, DFmode and XFmode */
296 {4, 4, 4}, /* cost of loading integer registers */
297 2, /* cost of moving MMX register */
298 {2, 2}, /* cost of loading MMX registers
299 in SImode and DImode */
300 {2, 2}, /* cost of storing MMX registers
301 in SImode and DImode */
302 2, /* cost of moving SSE register */
303 {2, 2, 8}, /* cost of loading SSE registers
304 in SImode, DImode and TImode */
305 {2, 2, 8}, /* cost of storing SSE registers
306 in SImode, DImode and TImode */
307 6, /* MMX or SSE register to integer */
308 32, /* size of prefetch block */
309 1, /* number of parallel prefetches */
311 2, /* cost of FADD and FSUB insns. */
312 2, /* cost of FMUL instruction. */
313 56, /* cost of FDIV instruction. */
314 2, /* cost of FABS instruction. */
315 2, /* cost of FCHS instruction. */
316 56, /* cost of FSQRT instruction. */
320 struct processor_costs athlon_cost = {
321 1, /* cost of an add instruction */
322 2, /* cost of a lea instruction */
323 1, /* variable shift costs */
324 1, /* constant shift costs */
325 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
326 0, /* cost of multiply per each bit set */
327 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
328 1, /* cost of movsx */
329 1, /* cost of movzx */
330 8, /* "large" insn */
332 4, /* cost for loading QImode using movzbl */
333 {3, 4, 3}, /* cost of loading integer registers
334 in QImode, HImode and SImode.
335 Relative to reg-reg move (2). */
336 {3, 4, 3}, /* cost of storing integer registers */
337 4, /* cost of reg,reg fld/fst */
338 {4, 4, 12}, /* cost of loading fp registers
339 in SFmode, DFmode and XFmode */
340 {6, 6, 8}, /* cost of loading integer registers */
341 2, /* cost of moving MMX register */
342 {4, 4}, /* cost of loading MMX registers
343 in SImode and DImode */
344 {4, 4}, /* cost of storing MMX registers
345 in SImode and DImode */
346 2, /* cost of moving SSE register */
347 {4, 4, 6}, /* cost of loading SSE registers
348 in SImode, DImode and TImode */
349 {4, 4, 5}, /* cost of storing SSE registers
350 in SImode, DImode and TImode */
351 5, /* MMX or SSE register to integer */
352 64, /* size of prefetch block */
353 6, /* number of parallel prefetches */
355 4, /* cost of FADD and FSUB insns. */
356 4, /* cost of FMUL instruction. */
357 24, /* cost of FDIV instruction. */
358 2, /* cost of FABS instruction. */
359 2, /* cost of FCHS instruction. */
360 35, /* cost of FSQRT instruction. */
364 struct processor_costs k8_cost = {
365 1, /* cost of an add instruction */
366 2, /* cost of a lea instruction */
367 1, /* variable shift costs */
368 1, /* constant shift costs */
369 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
370 0, /* cost of multiply per each bit set */
371 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
372 1, /* cost of movsx */
373 1, /* cost of movzx */
374 8, /* "large" insn */
376 4, /* cost for loading QImode using movzbl */
377 {3, 4, 3}, /* cost of loading integer registers
378 in QImode, HImode and SImode.
379 Relative to reg-reg move (2). */
380 {3, 4, 3}, /* cost of storing integer registers */
381 4, /* cost of reg,reg fld/fst */
382 {4, 4, 12}, /* cost of loading fp registers
383 in SFmode, DFmode and XFmode */
384 {6, 6, 8}, /* cost of loading integer registers */
385 2, /* cost of moving MMX register */
386 {3, 3}, /* cost of loading MMX registers
387 in SImode and DImode */
388 {4, 4}, /* cost of storing MMX registers
389 in SImode and DImode */
390 2, /* cost of moving SSE register */
391 {4, 3, 6}, /* cost of loading SSE registers
392 in SImode, DImode and TImode */
393 {4, 4, 5}, /* cost of storing SSE registers
394 in SImode, DImode and TImode */
395 5, /* MMX or SSE register to integer */
396 64, /* size of prefetch block */
397 6, /* number of parallel prefetches */
399 4, /* cost of FADD and FSUB insns. */
400 4, /* cost of FMUL instruction. */
401 19, /* cost of FDIV instruction. */
402 2, /* cost of FABS instruction. */
403 2, /* cost of FCHS instruction. */
404 35, /* cost of FSQRT instruction. */
408 struct processor_costs pentium4_cost = {
409 1, /* cost of an add instruction */
410 1, /* cost of a lea instruction */
411 4, /* variable shift costs */
412 4, /* constant shift costs */
413 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
414 0, /* cost of multiply per each bit set */
415 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
416 1, /* cost of movsx */
417 1, /* cost of movzx */
418 16, /* "large" insn */
420 2, /* cost for loading QImode using movzbl */
421 {4, 5, 4}, /* cost of loading integer registers
422 in QImode, HImode and SImode.
423 Relative to reg-reg move (2). */
424 {2, 3, 2}, /* cost of storing integer registers */
425 2, /* cost of reg,reg fld/fst */
426 {2, 2, 6}, /* cost of loading fp registers
427 in SFmode, DFmode and XFmode */
428 {4, 4, 6}, /* cost of loading integer registers */
429 2, /* cost of moving MMX register */
430 {2, 2}, /* cost of loading MMX registers
431 in SImode and DImode */
432 {2, 2}, /* cost of storing MMX registers
433 in SImode and DImode */
434 12, /* cost of moving SSE register */
435 {12, 12, 12}, /* cost of loading SSE registers
436 in SImode, DImode and TImode */
437 {2, 2, 8}, /* cost of storing SSE registers
438 in SImode, DImode and TImode */
439 10, /* MMX or SSE register to integer */
440 64, /* size of prefetch block */
441 6, /* number of parallel prefetches */
443 5, /* cost of FADD and FSUB insns. */
444 7, /* cost of FMUL instruction. */
445 43, /* cost of FDIV instruction. */
446 2, /* cost of FABS instruction. */
447 2, /* cost of FCHS instruction. */
448 43, /* cost of FSQRT instruction. */
451 const struct processor_costs *ix86_cost = &pentium_cost;
453 /* Processor feature/optimization bitmasks. */
454 #define m_386 (1<<PROCESSOR_I386)
455 #define m_486 (1<<PROCESSOR_I486)
456 #define m_PENT (1<<PROCESSOR_PENTIUM)
457 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
458 #define m_K6 (1<<PROCESSOR_K6)
459 #define m_ATHLON (1<<PROCESSOR_ATHLON)
460 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
461 #define m_K8 (1<<PROCESSOR_K8)
462 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
464 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
465 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
466 const int x86_zero_extend_with_and = m_486 | m_PENT;
467 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
468 const int x86_double_with_add = ~m_386;
469 const int x86_use_bit_test = m_386;
470 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
471 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
472 const int x86_3dnow_a = m_ATHLON_K8;
473 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
474 const int x86_branch_hints = m_PENT4;
475 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
476 const int x86_partial_reg_stall = m_PPRO;
477 const int x86_use_loop = m_K6;
478 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
479 const int x86_use_mov0 = m_K6;
480 const int x86_use_cltd = ~(m_PENT | m_K6);
481 const int x86_read_modify_write = ~m_PENT;
482 const int x86_read_modify = ~(m_PENT | m_PPRO);
483 const int x86_split_long_moves = m_PPRO;
484 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
485 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
486 const int x86_single_stringop = m_386 | m_PENT4;
487 const int x86_qimode_math = ~(0);
488 const int x86_promote_qi_regs = 0;
489 const int x86_himode_math = ~(m_PPRO);
490 const int x86_promote_hi_regs = m_PPRO;
491 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
492 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
493 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
494 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
495 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
496 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
497 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
498 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
499 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
500 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
501 const int x86_decompose_lea = m_PENT4;
502 const int x86_shift1 = ~m_486;
503 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
504 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
505 /* Set for machines where the type and dependencies are resolved on SSE register
506 parts instead of whole registers, so we may maintain just lower part of
507 scalar values in proper format leaving the upper part undefined. */
508 const int x86_sse_partial_regs = m_ATHLON_K8;
509 /* Athlon optimizes partial-register FPS special case, thus avoiding the
510 need for extra instructions beforehand */
511 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
512 const int x86_sse_typeless_stores = m_ATHLON_K8;
513 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
514 const int x86_use_ffreep = m_ATHLON_K8;
515 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
517 /* In case the average insn count for single function invocation is
518 lower than this constant, emit fast (but longer) prologue and
520 #define FAST_PROLOGUE_INSN_COUNT 20
522 /* Set by prologue expander and used by epilogue expander to determine
524 static int use_fast_prologue_epilogue;
526 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
527 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
528 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
529 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
531 /* Array of the smallest class containing reg number REGNO, indexed by
532 REGNO. Used by REGNO_REG_CLASS in i386.h. */
534 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
537 AREG, DREG, CREG, BREG,
539 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
541 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
542 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
545 /* flags, fpsr, dirflag, frame */
546 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
547 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
549 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
551 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
552 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
553 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
557 /* The "default" register map used in 32bit mode. */
559 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
561 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
562 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
563 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
564 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
565 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
566 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
567 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
570 static int const x86_64_int_parameter_registers[6] =
572 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
573 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
576 static int const x86_64_int_return_registers[4] =
578 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
581 /* The "default" register map used in 64bit mode. */
582 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
584 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
585 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
586 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
587 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
588 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
589 8,9,10,11,12,13,14,15, /* extended integer registers */
590 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
593 /* Define the register numbers to be used in Dwarf debugging information.
594 The SVR4 reference port C compiler uses the following register numbers
595 in its Dwarf output code:
596 0 for %eax (gcc regno = 0)
597 1 for %ecx (gcc regno = 2)
598 2 for %edx (gcc regno = 1)
599 3 for %ebx (gcc regno = 3)
600 4 for %esp (gcc regno = 7)
601 5 for %ebp (gcc regno = 6)
602 6 for %esi (gcc regno = 4)
603 7 for %edi (gcc regno = 5)
604 The following three DWARF register numbers are never generated by
605 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
606 believes these numbers have these meanings.
607 8 for %eip (no gcc equivalent)
608 9 for %eflags (gcc regno = 17)
609 10 for %trapno (no gcc equivalent)
610 It is not at all clear how we should number the FP stack registers
611 for the x86 architecture. If the version of SDB on x86/svr4 were
612 a bit less brain dead with respect to floating-point then we would
613 have a precedent to follow with respect to DWARF register numbers
614 for x86 FP registers, but the SDB on x86/svr4 is so completely
615 broken with respect to FP registers that it is hardly worth thinking
616 of it as something to strive for compatibility with.
617 The version of x86/svr4 SDB I have at the moment does (partially)
618 seem to believe that DWARF register number 11 is associated with
619 the x86 register %st(0), but that's about all. Higher DWARF
620 register numbers don't seem to be associated with anything in
621 particular, and even for DWARF regno 11, SDB only seems to under-
622 stand that it should say that a variable lives in %st(0) (when
623 asked via an `=' command) if we said it was in DWARF regno 11,
624 but SDB still prints garbage when asked for the value of the
625 variable in question (via a `/' command).
626 (Also note that the labels SDB prints for various FP stack regs
627 when doing an `x' command are all wrong.)
628 Note that these problems generally don't affect the native SVR4
629 C compiler because it doesn't allow the use of -O with -g and
630 because when it is *not* optimizing, it allocates a memory
631 location for each floating-point variable, and the memory
632 location is what gets described in the DWARF AT_location
633 attribute for the variable in question.
634 Regardless of the severe mental illness of the x86/svr4 SDB, we
635 do something sensible here and we use the following DWARF
636 register numbers. Note that these are all stack-top-relative
638 11 for %st(0) (gcc regno = 8)
639 12 for %st(1) (gcc regno = 9)
640 13 for %st(2) (gcc regno = 10)
641 14 for %st(3) (gcc regno = 11)
642 15 for %st(4) (gcc regno = 12)
643 16 for %st(5) (gcc regno = 13)
644 17 for %st(6) (gcc regno = 14)
645 18 for %st(7) (gcc regno = 15)
647 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
649 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
650 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
651 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
652 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
653 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
654 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
655 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
658 /* Test and compare insns in i386.md store the information needed to
659 generate branch and scc insns here. */
661 rtx ix86_compare_op0 = NULL_RTX;
662 rtx ix86_compare_op1 = NULL_RTX;
664 /* The encoding characters for the four TLS models present in ELF. */
666 static char const tls_model_chars[] = " GLil";
668 #define MAX_386_STACK_LOCALS 3
669 /* Size of the register save area. */
670 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
672 /* Define the structure for the machine field in struct function. */
673 struct machine_function GTY(())
675 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
676 const char *some_ld_name;
677 int save_varrargs_registers;
678 int accesses_prev_frame;
681 #define ix86_stack_locals (cfun->machine->stack_locals)
682 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
684 /* Structure describing stack frame layout.
685 Stack grows downward:
691 saved frame pointer if frame_pointer_needed
692 <- HARD_FRAME_POINTER
698 > to_allocate <- FRAME_POINTER
710 int outgoing_arguments_size;
713 HOST_WIDE_INT to_allocate;
714 /* The offsets relative to ARG_POINTER. */
715 HOST_WIDE_INT frame_pointer_offset;
716 HOST_WIDE_INT hard_frame_pointer_offset;
717 HOST_WIDE_INT stack_pointer_offset;
720 /* Used to enable/disable debugging features. */
721 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
722 /* Code model option as passed by user. */
723 const char *ix86_cmodel_string;
725 enum cmodel ix86_cmodel;
727 const char *ix86_asm_string;
728 enum asm_dialect ix86_asm_dialect = ASM_ATT;
730 const char *ix86_tls_dialect_string;
731 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
733 /* Which unit we are generating floating point math for. */
734 enum fpmath_unit ix86_fpmath;
736 /* Which cpu are we scheduling for. */
737 enum processor_type ix86_cpu;
738 /* Which instruction set architecture to use. */
739 enum processor_type ix86_arch;
741 /* Strings to hold which cpu and instruction set architecture to use. */
742 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
743 const char *ix86_arch_string; /* for -march=<xxx> */
744 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
746 /* # of registers to use to pass arguments. */
747 const char *ix86_regparm_string;
749 /* true if sse prefetch instruction is not NOOP. */
750 int x86_prefetch_sse;
752 /* ix86_regparm_string as a number */
755 /* Alignment to use for loops and jumps: */
757 /* Power of two alignment for loops. */
758 const char *ix86_align_loops_string;
760 /* Power of two alignment for non-loop jumps. */
761 const char *ix86_align_jumps_string;
763 /* Power of two alignment for stack boundary in bytes. */
764 const char *ix86_preferred_stack_boundary_string;
766 /* Preferred alignment for stack boundary in bits. */
767 int ix86_preferred_stack_boundary;
769 /* Values 1-5: see jump.c */
770 int ix86_branch_cost;
771 const char *ix86_branch_cost_string;
773 /* Power of two alignment for functions. */
774 const char *ix86_align_funcs_string;
776 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
777 static char internal_label_prefix[16];
778 static int internal_label_prefix_len;
780 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
781 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
782 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
783 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
785 static const char *get_some_local_dynamic_name PARAMS ((void));
786 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
787 static rtx maybe_get_pool_constant PARAMS ((rtx));
788 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
789 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
791 static rtx get_thread_pointer PARAMS ((void));
792 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
793 static rtx gen_push PARAMS ((rtx));
794 static int memory_address_length PARAMS ((rtx addr));
795 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
796 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
797 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
798 static void ix86_dump_ppro_packet PARAMS ((FILE *));
799 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
800 static struct machine_function * ix86_init_machine_status PARAMS ((void));
801 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
802 static int ix86_nsaved_regs PARAMS ((void));
803 static void ix86_emit_save_regs PARAMS ((void));
804 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
805 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
806 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
807 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
808 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
809 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
810 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
811 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
812 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
813 static int ix86_issue_rate PARAMS ((void));
814 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
815 static void ix86_sched_init PARAMS ((FILE *, int, int));
816 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
817 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
818 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
819 static int ia32_multipass_dfa_lookahead PARAMS ((void));
820 static void ix86_init_mmx_sse_builtins PARAMS ((void));
821 static rtx x86_this_parameter PARAMS ((tree));
822 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
823 HOST_WIDE_INT, tree));
824 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
825 HOST_WIDE_INT, tree));
826 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
830 rtx base, index, disp;
834 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
835 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
837 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
838 static const char *ix86_strip_name_encoding PARAMS ((const char *))
841 struct builtin_description;
842 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
844 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
846 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
847 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
848 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
849 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
850 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
851 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
852 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
856 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
858 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
859 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
860 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
861 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
862 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
863 static int ix86_save_reg PARAMS ((unsigned int, int));
864 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
865 static int ix86_comp_type_attributes PARAMS ((tree, tree));
866 static int ix86_fntype_regparm PARAMS ((tree));
867 const struct attribute_spec ix86_attribute_table[];
868 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
869 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
870 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
871 static int ix86_value_regno PARAMS ((enum machine_mode));
872 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
873 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
874 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
876 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
877 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
880 /* Register class used for passing given 64bit part of the argument.
881 These represent classes as documented by the PS ABI, with the exception
882 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
883 use SF or DFmode move instead of DImode to avoid reformatting penalties.
885 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
886 whenever possible (upper half does contain padding).
888 enum x86_64_reg_class
891 X86_64_INTEGER_CLASS,
892 X86_64_INTEGERSI_CLASS,
901 static const char * const x86_64_reg_class_name[] =
902 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
904 #define MAX_CLASSES 4
905 static int classify_argument PARAMS ((enum machine_mode, tree,
906 enum x86_64_reg_class [MAX_CLASSES],
908 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
910 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
912 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
913 enum x86_64_reg_class));
915 /* Initialize the GCC target structure. */
916 #undef TARGET_ATTRIBUTE_TABLE
917 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
918 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
919 # undef TARGET_MERGE_DECL_ATTRIBUTES
920 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
923 #undef TARGET_COMP_TYPE_ATTRIBUTES
924 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
926 #undef TARGET_INIT_BUILTINS
927 #define TARGET_INIT_BUILTINS ix86_init_builtins
929 #undef TARGET_EXPAND_BUILTIN
930 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
932 #undef TARGET_ASM_FUNCTION_EPILOGUE
933 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
935 #undef TARGET_ASM_OPEN_PAREN
936 #define TARGET_ASM_OPEN_PAREN ""
937 #undef TARGET_ASM_CLOSE_PAREN
938 #define TARGET_ASM_CLOSE_PAREN ""
940 #undef TARGET_ASM_ALIGNED_HI_OP
941 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
942 #undef TARGET_ASM_ALIGNED_SI_OP
943 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
945 #undef TARGET_ASM_ALIGNED_DI_OP
946 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
949 #undef TARGET_ASM_UNALIGNED_HI_OP
950 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
951 #undef TARGET_ASM_UNALIGNED_SI_OP
952 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
953 #undef TARGET_ASM_UNALIGNED_DI_OP
954 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
956 #undef TARGET_SCHED_ADJUST_COST
957 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
958 #undef TARGET_SCHED_ISSUE_RATE
959 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
960 #undef TARGET_SCHED_VARIABLE_ISSUE
961 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
962 #undef TARGET_SCHED_INIT
963 #define TARGET_SCHED_INIT ix86_sched_init
964 #undef TARGET_SCHED_REORDER
965 #define TARGET_SCHED_REORDER ix86_sched_reorder
966 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
967 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
968 ia32_use_dfa_pipeline_interface
969 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
970 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
971 ia32_multipass_dfa_lookahead
973 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
974 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
977 #undef TARGET_HAVE_TLS
978 #define TARGET_HAVE_TLS true
980 #undef TARGET_CANNOT_FORCE_CONST_MEM
981 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
983 #undef TARGET_MS_BITFIELD_LAYOUT_P
984 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
986 #undef TARGET_ASM_OUTPUT_MI_THUNK
987 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
988 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
989 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
991 struct gcc_target targetm = TARGET_INITIALIZER;
993 /* Sometimes certain combinations of command options do not make
994 sense on a particular target machine. You can define a macro
995 `OVERRIDE_OPTIONS' to take account of this. This macro, if
996 defined, is executed once just after all the command options have
999 Don't use this macro to turn on various extra optimizations for
1000 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1006 /* Comes from final.c -- no real reason to change it. */
1007 #define MAX_CODE_ALIGN 16
1011 const struct processor_costs *cost; /* Processor costs */
1012 const int target_enable; /* Target flags to enable. */
1013 const int target_disable; /* Target flags to disable. */
1014 const int align_loop; /* Default alignments. */
1015 const int align_loop_max_skip;
1016 const int align_jump;
1017 const int align_jump_max_skip;
1018 const int align_func;
1020 const processor_target_table[PROCESSOR_max] =
1022 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1023 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1024 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1025 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1026 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1027 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1028 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1029 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1032 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1035 const char *const name; /* processor name or nickname. */
1036 const enum processor_type processor;
1037 const enum pta_flags
1042 PTA_PREFETCH_SSE = 8,
1048 const processor_alias_table[] =
1050 {"i386", PROCESSOR_I386, 0},
1051 {"i486", PROCESSOR_I486, 0},
1052 {"i586", PROCESSOR_PENTIUM, 0},
1053 {"pentium", PROCESSOR_PENTIUM, 0},
1054 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1055 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1056 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1057 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1058 {"i686", PROCESSOR_PENTIUMPRO, 0},
1059 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1060 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1061 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1062 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1063 PTA_MMX | PTA_PREFETCH_SSE},
1064 {"k6", PROCESSOR_K6, PTA_MMX},
1065 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1066 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1067 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1069 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1070 | PTA_3DNOW | PTA_3DNOW_A},
1071 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1072 | PTA_3DNOW_A | PTA_SSE},
1073 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1074 | PTA_3DNOW_A | PTA_SSE},
1075 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1076 | PTA_3DNOW_A | PTA_SSE},
1077 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1078 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1081 int const pta_size = ARRAY_SIZE (processor_alias_table);
1083 /* By default our XFmode is the 80-bit extended format. If we have
1084 use TFmode instead, it's also the 80-bit format, but with padding. */
1085 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1086 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1088 /* Set the default values for switches whose default depends on TARGET_64BIT
1089 in case they weren't overwritten by command line options. */
1092 if (flag_omit_frame_pointer == 2)
1093 flag_omit_frame_pointer = 1;
1094 if (flag_asynchronous_unwind_tables == 2)
1095 flag_asynchronous_unwind_tables = 1;
1096 if (flag_pcc_struct_return == 2)
1097 flag_pcc_struct_return = 0;
1101 if (flag_omit_frame_pointer == 2)
1102 flag_omit_frame_pointer = 0;
1103 if (flag_asynchronous_unwind_tables == 2)
1104 flag_asynchronous_unwind_tables = 0;
1105 if (flag_pcc_struct_return == 2)
1106 flag_pcc_struct_return = 1;
1109 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1110 SUBTARGET_OVERRIDE_OPTIONS;
1113 if (!ix86_cpu_string && ix86_arch_string)
1114 ix86_cpu_string = ix86_arch_string;
1115 if (!ix86_cpu_string)
1116 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1117 if (!ix86_arch_string)
1118 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1120 if (ix86_cmodel_string != 0)
1122 if (!strcmp (ix86_cmodel_string, "small"))
1123 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1125 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1126 else if (!strcmp (ix86_cmodel_string, "32"))
1127 ix86_cmodel = CM_32;
1128 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1129 ix86_cmodel = CM_KERNEL;
1130 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1131 ix86_cmodel = CM_MEDIUM;
1132 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1133 ix86_cmodel = CM_LARGE;
1135 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1139 ix86_cmodel = CM_32;
1141 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1143 if (ix86_asm_string != 0)
1145 if (!strcmp (ix86_asm_string, "intel"))
1146 ix86_asm_dialect = ASM_INTEL;
1147 else if (!strcmp (ix86_asm_string, "att"))
1148 ix86_asm_dialect = ASM_ATT;
1150 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1152 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1153 error ("code model `%s' not supported in the %s bit mode",
1154 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1155 if (ix86_cmodel == CM_LARGE)
1156 sorry ("code model `large' not supported yet");
1157 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1158 sorry ("%i-bit mode not compiled in",
1159 (target_flags & MASK_64BIT) ? 64 : 32);
1161 for (i = 0; i < pta_size; i++)
1162 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1164 ix86_arch = processor_alias_table[i].processor;
1165 /* Default cpu tuning to the architecture. */
1166 ix86_cpu = ix86_arch;
1167 if (processor_alias_table[i].flags & PTA_MMX
1168 && !(target_flags_explicit & MASK_MMX))
1169 target_flags |= MASK_MMX;
1170 if (processor_alias_table[i].flags & PTA_3DNOW
1171 && !(target_flags_explicit & MASK_3DNOW))
1172 target_flags |= MASK_3DNOW;
1173 if (processor_alias_table[i].flags & PTA_3DNOW_A
1174 && !(target_flags_explicit & MASK_3DNOW_A))
1175 target_flags |= MASK_3DNOW_A;
1176 if (processor_alias_table[i].flags & PTA_SSE
1177 && !(target_flags_explicit & MASK_SSE))
1178 target_flags |= MASK_SSE;
1179 if (processor_alias_table[i].flags & PTA_SSE2
1180 && !(target_flags_explicit & MASK_SSE2))
1181 target_flags |= MASK_SSE2;
1182 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1183 x86_prefetch_sse = true;
1184 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1185 error ("CPU you selected does not support x86-64 instruction set");
1190 error ("bad value (%s) for -march= switch", ix86_arch_string);
1192 for (i = 0; i < pta_size; i++)
1193 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1195 ix86_cpu = processor_alias_table[i].processor;
1196 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1197 error ("CPU you selected does not support x86-64 instruction set");
1200 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1201 x86_prefetch_sse = true;
1203 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1206 ix86_cost = &size_cost;
1208 ix86_cost = processor_target_table[ix86_cpu].cost;
1209 target_flags |= processor_target_table[ix86_cpu].target_enable;
1210 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1212 /* Arrange to set up i386_stack_locals for all functions. */
1213 init_machine_status = ix86_init_machine_status;
1215 /* Validate -mregparm= value. */
1216 if (ix86_regparm_string)
1218 i = atoi (ix86_regparm_string);
1219 if (i < 0 || i > REGPARM_MAX)
1220 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1226 ix86_regparm = REGPARM_MAX;
1228 /* If the user has provided any of the -malign-* options,
1229 warn and use that value only if -falign-* is not set.
1230 Remove this code in GCC 3.2 or later. */
1231 if (ix86_align_loops_string)
1233 warning ("-malign-loops is obsolete, use -falign-loops");
1234 if (align_loops == 0)
1236 i = atoi (ix86_align_loops_string);
1237 if (i < 0 || i > MAX_CODE_ALIGN)
1238 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1240 align_loops = 1 << i;
1244 if (ix86_align_jumps_string)
1246 warning ("-malign-jumps is obsolete, use -falign-jumps");
1247 if (align_jumps == 0)
1249 i = atoi (ix86_align_jumps_string);
1250 if (i < 0 || i > MAX_CODE_ALIGN)
1251 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1253 align_jumps = 1 << i;
1257 if (ix86_align_funcs_string)
1259 warning ("-malign-functions is obsolete, use -falign-functions");
1260 if (align_functions == 0)
1262 i = atoi (ix86_align_funcs_string);
1263 if (i < 0 || i > MAX_CODE_ALIGN)
1264 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1266 align_functions = 1 << i;
1270 /* Default align_* from the processor table. */
1271 if (align_loops == 0)
1273 align_loops = processor_target_table[ix86_cpu].align_loop;
1274 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1276 if (align_jumps == 0)
1278 align_jumps = processor_target_table[ix86_cpu].align_jump;
1279 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1281 if (align_functions == 0)
1283 align_functions = processor_target_table[ix86_cpu].align_func;
1286 /* Validate -mpreferred-stack-boundary= value, or provide default.
1287 The default of 128 bits is for Pentium III's SSE __m128, but we
1288 don't want additional code to keep the stack aligned when
1289 optimizing for code size. */
1290 ix86_preferred_stack_boundary = (optimize_size
1291 ? TARGET_64BIT ? 128 : 32
1293 if (ix86_preferred_stack_boundary_string)
1295 i = atoi (ix86_preferred_stack_boundary_string);
1296 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1297 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1298 TARGET_64BIT ? 4 : 2);
1300 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1303 /* Validate -mbranch-cost= value, or provide default. */
1304 ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
1305 if (ix86_branch_cost_string)
1307 i = atoi (ix86_branch_cost_string);
1309 error ("-mbranch-cost=%d is not between 0 and 5", i);
1311 ix86_branch_cost = i;
1314 if (ix86_tls_dialect_string)
1316 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1317 ix86_tls_dialect = TLS_DIALECT_GNU;
1318 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1319 ix86_tls_dialect = TLS_DIALECT_SUN;
1321 error ("bad value (%s) for -mtls-dialect= switch",
1322 ix86_tls_dialect_string);
1325 /* Keep nonleaf frame pointers. */
1326 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1327 flag_omit_frame_pointer = 1;
1329 /* If we're doing fast math, we don't care about comparison order
1330 wrt NaNs. This lets us use a shorter comparison sequence. */
1331 if (flag_unsafe_math_optimizations)
1332 target_flags &= ~MASK_IEEE_FP;
1334 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1335 since the insns won't need emulation. */
1336 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1337 target_flags &= ~MASK_NO_FANCY_MATH_387;
1341 if (TARGET_ALIGN_DOUBLE)
1342 error ("-malign-double makes no sense in the 64bit mode");
1344 error ("-mrtd calling convention not supported in the 64bit mode");
1345 /* Enable by default the SSE and MMX builtins. */
1346 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1347 ix86_fpmath = FPMATH_SSE;
1350 ix86_fpmath = FPMATH_387;
1352 if (ix86_fpmath_string != 0)
1354 if (! strcmp (ix86_fpmath_string, "387"))
1355 ix86_fpmath = FPMATH_387;
1356 else if (! strcmp (ix86_fpmath_string, "sse"))
1360 warning ("SSE instruction set disabled, using 387 arithmetics");
1361 ix86_fpmath = FPMATH_387;
1364 ix86_fpmath = FPMATH_SSE;
1366 else if (! strcmp (ix86_fpmath_string, "387,sse")
1367 || ! strcmp (ix86_fpmath_string, "sse,387"))
1371 warning ("SSE instruction set disabled, using 387 arithmetics");
1372 ix86_fpmath = FPMATH_387;
1374 else if (!TARGET_80387)
1376 warning ("387 instruction set disabled, using SSE arithmetics");
1377 ix86_fpmath = FPMATH_SSE;
1380 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1383 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1386 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1390 target_flags |= MASK_MMX;
1391 x86_prefetch_sse = true;
1394 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1397 target_flags |= MASK_MMX;
1398 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1399 extensions it adds. */
1400 if (x86_3dnow_a & (1 << ix86_arch))
1401 target_flags |= MASK_3DNOW_A;
1403 if ((x86_accumulate_outgoing_args & CPUMASK)
1404 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1406 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1408 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1411 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1412 p = strchr (internal_label_prefix, 'X');
1413 internal_label_prefix_len = p - internal_label_prefix;
1419 optimization_options (level, size)
1421 int size ATTRIBUTE_UNUSED;
1423 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1424 make the problem with not enough registers even worse. */
1425 #ifdef INSN_SCHEDULING
1427 flag_schedule_insns = 0;
1430 /* The default values of these switches depend on the TARGET_64BIT
1431 that is not known at this moment. Mark these values with 2 and
1432 let user the to override these. In case there is no command line option
1433 specifying them, we will set the defaults in override_options. */
1435 flag_omit_frame_pointer = 2;
1436 flag_pcc_struct_return = 2;
1437 flag_asynchronous_unwind_tables = 2;
1440 /* Table of valid machine attributes. */
1441 const struct attribute_spec ix86_attribute_table[] =
1443 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1444 /* Stdcall attribute says callee is responsible for popping arguments
1445 if they are not variable. */
1446 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1447 /* Fastcall attribute says callee is responsible for popping arguments
1448 if they are not variable. */
1449 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1450 /* Cdecl attribute says the callee is a normal C declaration */
1451 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1452 /* Regparm attribute specifies how many integer arguments are to be
1453 passed in registers. */
1454 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1455 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1456 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1457 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1458 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1460 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1461 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1462 { NULL, 0, 0, false, false, false, NULL }
1465 /* If PIC, we cannot make sibling calls to global functions
1466 because the PLT requires %ebx live.
1467 If we are returning floats on the register stack, we cannot make
1468 sibling calls to functions that return floats. (The stack adjust
1469 instruction will wind up after the sibcall jump, and not be executed.) */
1472 ix86_function_ok_for_sibcall (decl, exp)
1476 /* If we are generating position-independent code, we cannot sibcall
1477 optimize any indirect call, or a direct call to a global function,
1478 as the PLT requires %ebx be live. */
1479 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1482 /* If we are returning floats on the 80387 register stack, we cannot
1483 make a sibcall from a function that doesn't return a float to a
1484 function that does; the necessary stack adjustment will not be
1486 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1487 && ! STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1490 /* If this call is indirect, we'll need to be able to use a call-clobbered
1491 register for the address of the target function. Make sure that all
1492 such registers are not used for passing parameters. */
1493 if (!decl && !TARGET_64BIT)
1495 int regparm = ix86_regparm;
1498 /* We're looking at the CALL_EXPR, we need the type of the function. */
1499 type = TREE_OPERAND (exp, 0); /* pointer expression */
1500 type = TREE_TYPE (type); /* pointer type */
1501 type = TREE_TYPE (type); /* function type */
1503 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1505 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1509 /* ??? Need to count the actual number of registers to be used,
1510 not the possible number of registers. Fix later. */
1515 /* Otherwise okay. That also includes certain types of indirect calls. */
1519 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1520 arguments as in struct attribute_spec.handler. */
1522 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1525 tree args ATTRIBUTE_UNUSED;
1526 int flags ATTRIBUTE_UNUSED;
1529 if (TREE_CODE (*node) != FUNCTION_TYPE
1530 && TREE_CODE (*node) != METHOD_TYPE
1531 && TREE_CODE (*node) != FIELD_DECL
1532 && TREE_CODE (*node) != TYPE_DECL)
1534 warning ("`%s' attribute only applies to functions",
1535 IDENTIFIER_POINTER (name));
1536 *no_add_attrs = true;
1540 if (is_attribute_p ("fastcall", name))
1542 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1544 error ("fastcall and stdcall attributes are not compatible");
1546 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1548 error ("fastcall and regparm attributes are not compatible");
1551 else if (is_attribute_p ("stdcall", name))
1553 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1555 error ("fastcall and stdcall attributes are not compatible");
1562 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1563 *no_add_attrs = true;
1569 /* Handle a "regparm" attribute;
1570 arguments as in struct attribute_spec.handler. */
1572 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1576 int flags ATTRIBUTE_UNUSED;
1579 if (TREE_CODE (*node) != FUNCTION_TYPE
1580 && TREE_CODE (*node) != METHOD_TYPE
1581 && TREE_CODE (*node) != FIELD_DECL
1582 && TREE_CODE (*node) != TYPE_DECL)
1584 warning ("`%s' attribute only applies to functions",
1585 IDENTIFIER_POINTER (name));
1586 *no_add_attrs = true;
1592 cst = TREE_VALUE (args);
1593 if (TREE_CODE (cst) != INTEGER_CST)
1595 warning ("`%s' attribute requires an integer constant argument",
1596 IDENTIFIER_POINTER (name));
1597 *no_add_attrs = true;
1599 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1601 warning ("argument to `%s' attribute larger than %d",
1602 IDENTIFIER_POINTER (name), REGPARM_MAX);
1603 *no_add_attrs = true;
1606 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1608 error ("fastcall and regparm attributes are not compatible");
1615 /* Return 0 if the attributes for two types are incompatible, 1 if they
1616 are compatible, and 2 if they are nearly compatible (which causes a
1617 warning to be generated). */
1620 ix86_comp_type_attributes (type1, type2)
1624 /* Check for mismatch of non-default calling convention. */
1625 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1627 if (TREE_CODE (type1) != FUNCTION_TYPE)
1630 /* Check for mismatched fastcall types */
1631 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1632 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1635 /* Check for mismatched return types (cdecl vs stdcall). */
1636 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1637 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1642 /* Return the regparm value for a fuctio with the indicated TYPE. */
1645 ix86_fntype_regparm (type)
1650 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1652 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1654 return ix86_regparm;
1657 /* Value is the number of bytes of arguments automatically
1658 popped when returning from a subroutine call.
1659 FUNDECL is the declaration node of the function (as a tree),
1660 FUNTYPE is the data type of the function (as a tree),
1661 or for a library call it is an identifier node for the subroutine name.
1662 SIZE is the number of bytes of arguments passed on the stack.
1664 On the 80386, the RTD insn may be used to pop them if the number
1665 of args is fixed, but if the number is variable then the caller
1666 must pop them all. RTD can't be used for library calls now
1667 because the library is compiled with the Unix compiler.
1668 Use of RTD is a selectable option, since it is incompatible with
1669 standard Unix calling sequences. If the option is not selected,
1670 the caller must always pop the args.
1672 The attribute stdcall is equivalent to RTD on a per module basis. */
1675 ix86_return_pops_args (fundecl, funtype, size)
1680 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1682 /* Cdecl functions override -mrtd, and never pop the stack. */
1683 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1685 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1686 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1687 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1691 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1692 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1693 == void_type_node)))
1697 /* Lose any fake structure return argument if it is passed on the stack. */
1698 if (aggregate_value_p (TREE_TYPE (funtype))
1701 int nregs = ix86_fntype_regparm (funtype);
1704 return GET_MODE_SIZE (Pmode);
1710 /* Argument support functions. */
1712 /* Return true when register may be used to pass function parameters. */
1714 ix86_function_arg_regno_p (regno)
1719 return (regno < REGPARM_MAX
1720 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1721 if (SSE_REGNO_P (regno) && TARGET_SSE)
1723 /* RAX is used as hidden argument to va_arg functions. */
1726 for (i = 0; i < REGPARM_MAX; i++)
1727 if (regno == x86_64_int_parameter_registers[i])
1732 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1733 for a call to a function whose data type is FNTYPE.
1734 For a library call, FNTYPE is 0. */
1737 init_cumulative_args (cum, fntype, libname)
1738 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1739 tree fntype; /* tree ptr for function decl */
1740 rtx libname; /* SYMBOL_REF of library name or 0 */
1742 static CUMULATIVE_ARGS zero_cum;
1743 tree param, next_param;
1745 if (TARGET_DEBUG_ARG)
1747 fprintf (stderr, "\ninit_cumulative_args (");
1749 fprintf (stderr, "fntype code = %s, ret code = %s",
1750 tree_code_name[(int) TREE_CODE (fntype)],
1751 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1753 fprintf (stderr, "no fntype");
1756 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1761 /* Set up the number of registers to use for passing arguments. */
1762 cum->nregs = ix86_regparm;
1763 cum->sse_nregs = SSE_REGPARM_MAX;
1764 if (fntype && !TARGET_64BIT)
1766 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1769 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1771 cum->maybe_vaarg = false;
1773 /* Use ecx and edx registers if function has fastcall attribute */
1774 if (fntype && !TARGET_64BIT)
1776 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1784 /* Determine if this function has variable arguments. This is
1785 indicated by the last argument being 'void_type_mode' if there
1786 are no variable arguments. If there are variable arguments, then
1787 we won't pass anything in registers */
1791 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1792 param != 0; param = next_param)
1794 next_param = TREE_CHAIN (param);
1795 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1802 cum->maybe_vaarg = true;
1806 if ((!fntype && !libname)
1807 || (fntype && !TYPE_ARG_TYPES (fntype)))
1808 cum->maybe_vaarg = 1;
1810 if (TARGET_DEBUG_ARG)
1811 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1816 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1817 of this code is to classify each 8bytes of incoming argument by the register
1818 class and assign registers accordingly. */
1820 /* Return the union class of CLASS1 and CLASS2.
1821 See the x86-64 PS ABI for details. */
1823 static enum x86_64_reg_class
1824 merge_classes (class1, class2)
1825 enum x86_64_reg_class class1, class2;
1827 /* Rule #1: If both classes are equal, this is the resulting class. */
1828 if (class1 == class2)
1831 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1833 if (class1 == X86_64_NO_CLASS)
1835 if (class2 == X86_64_NO_CLASS)
1838 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1839 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1840 return X86_64_MEMORY_CLASS;
1842 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1843 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1844 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1845 return X86_64_INTEGERSI_CLASS;
1846 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1847 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1848 return X86_64_INTEGER_CLASS;
1850 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1851 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1852 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1853 return X86_64_MEMORY_CLASS;
1855 /* Rule #6: Otherwise class SSE is used. */
1856 return X86_64_SSE_CLASS;
1859 /* Classify the argument of type TYPE and mode MODE.
1860 CLASSES will be filled by the register class used to pass each word
1861 of the operand. The number of words is returned. In case the parameter
1862 should be passed in memory, 0 is returned. As a special case for zero
1863 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1865 BIT_OFFSET is used internally for handling records and specifies offset
1866 of the offset in bits modulo 256 to avoid overflow cases.
1868 See the x86-64 PS ABI for details.
1872 classify_argument (mode, type, classes, bit_offset)
1873 enum machine_mode mode;
1875 enum x86_64_reg_class classes[MAX_CLASSES];
1879 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1880 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1882 /* Variable sized entities are always passed/returned in memory. */
1886 if (type && AGGREGATE_TYPE_P (type))
1890 enum x86_64_reg_class subclasses[MAX_CLASSES];
1892 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1896 for (i = 0; i < words; i++)
1897 classes[i] = X86_64_NO_CLASS;
1899 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1900 signalize memory class, so handle it as special case. */
1903 classes[0] = X86_64_NO_CLASS;
1907 /* Classify each field of record and merge classes. */
1908 if (TREE_CODE (type) == RECORD_TYPE)
1910 /* For classes first merge in the field of the subclasses. */
1911 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1913 tree bases = TYPE_BINFO_BASETYPES (type);
1914 int n_bases = TREE_VEC_LENGTH (bases);
1917 for (i = 0; i < n_bases; ++i)
1919 tree binfo = TREE_VEC_ELT (bases, i);
1921 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1922 tree type = BINFO_TYPE (binfo);
1924 num = classify_argument (TYPE_MODE (type),
1926 (offset + bit_offset) % 256);
1929 for (i = 0; i < num; i++)
1931 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1933 merge_classes (subclasses[i], classes[i + pos]);
1937 /* And now merge the fields of structure. */
1938 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1940 if (TREE_CODE (field) == FIELD_DECL)
1944 /* Bitfields are always classified as integer. Handle them
1945 early, since later code would consider them to be
1946 misaligned integers. */
1947 if (DECL_BIT_FIELD (field))
1949 for (i = int_bit_position (field) / 8 / 8;
1950 i < (int_bit_position (field)
1951 + tree_low_cst (DECL_SIZE (field), 0)
1954 merge_classes (X86_64_INTEGER_CLASS,
1959 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1960 TREE_TYPE (field), subclasses,
1961 (int_bit_position (field)
1962 + bit_offset) % 256);
1965 for (i = 0; i < num; i++)
1968 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1970 merge_classes (subclasses[i], classes[i + pos]);
1976 /* Arrays are handled as small records. */
1977 else if (TREE_CODE (type) == ARRAY_TYPE)
1980 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1981 TREE_TYPE (type), subclasses, bit_offset);
1985 /* The partial classes are now full classes. */
1986 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1987 subclasses[0] = X86_64_SSE_CLASS;
1988 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1989 subclasses[0] = X86_64_INTEGER_CLASS;
1991 for (i = 0; i < words; i++)
1992 classes[i] = subclasses[i % num];
1994 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1995 else if (TREE_CODE (type) == UNION_TYPE
1996 || TREE_CODE (type) == QUAL_UNION_TYPE)
1998 /* For classes first merge in the field of the subclasses. */
1999 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2001 tree bases = TYPE_BINFO_BASETYPES (type);
2002 int n_bases = TREE_VEC_LENGTH (bases);
2005 for (i = 0; i < n_bases; ++i)
2007 tree binfo = TREE_VEC_ELT (bases, i);
2009 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2010 tree type = BINFO_TYPE (binfo);
2012 num = classify_argument (TYPE_MODE (type),
2014 (offset + (bit_offset % 64)) % 256);
2017 for (i = 0; i < num; i++)
2019 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2021 merge_classes (subclasses[i], classes[i + pos]);
2025 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2027 if (TREE_CODE (field) == FIELD_DECL)
2030 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2031 TREE_TYPE (field), subclasses,
2035 for (i = 0; i < num; i++)
2036 classes[i] = merge_classes (subclasses[i], classes[i]);
2043 /* Final merger cleanup. */
2044 for (i = 0; i < words; i++)
2046 /* If one class is MEMORY, everything should be passed in
2048 if (classes[i] == X86_64_MEMORY_CLASS)
2051 /* The X86_64_SSEUP_CLASS should be always preceded by
2052 X86_64_SSE_CLASS. */
2053 if (classes[i] == X86_64_SSEUP_CLASS
2054 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2055 classes[i] = X86_64_SSE_CLASS;
2057 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2058 if (classes[i] == X86_64_X87UP_CLASS
2059 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2060 classes[i] = X86_64_SSE_CLASS;
2065 /* Compute alignment needed. We align all types to natural boundaries with
2066 exception of XFmode that is aligned to 64bits. */
2067 if (mode != VOIDmode && mode != BLKmode)
2069 int mode_alignment = GET_MODE_BITSIZE (mode);
2072 mode_alignment = 128;
2073 else if (mode == XCmode)
2074 mode_alignment = 256;
2075 /* Misaligned fields are always returned in memory. */
2076 if (bit_offset % mode_alignment)
2080 /* Classification of atomic types. */
2090 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2091 classes[0] = X86_64_INTEGERSI_CLASS;
2093 classes[0] = X86_64_INTEGER_CLASS;
2097 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2100 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2101 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2104 if (!(bit_offset % 64))
2105 classes[0] = X86_64_SSESF_CLASS;
2107 classes[0] = X86_64_SSE_CLASS;
2110 classes[0] = X86_64_SSEDF_CLASS;
2113 classes[0] = X86_64_X87_CLASS;
2114 classes[1] = X86_64_X87UP_CLASS;
2117 classes[0] = X86_64_X87_CLASS;
2118 classes[1] = X86_64_X87UP_CLASS;
2119 classes[2] = X86_64_X87_CLASS;
2120 classes[3] = X86_64_X87UP_CLASS;
2123 classes[0] = X86_64_SSEDF_CLASS;
2124 classes[1] = X86_64_SSEDF_CLASS;
2127 classes[0] = X86_64_SSE_CLASS;
2135 classes[0] = X86_64_SSE_CLASS;
2136 classes[1] = X86_64_SSEUP_CLASS;
2151 /* Examine the argument and return set number of register required in each
2152 class. Return 0 iff parameter should be passed in memory. */
2154 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2155 enum machine_mode mode;
2157 int *int_nregs, *sse_nregs;
2160 enum x86_64_reg_class class[MAX_CLASSES];
2161 int n = classify_argument (mode, type, class, 0);
2167 for (n--; n >= 0; n--)
2170 case X86_64_INTEGER_CLASS:
2171 case X86_64_INTEGERSI_CLASS:
2174 case X86_64_SSE_CLASS:
2175 case X86_64_SSESF_CLASS:
2176 case X86_64_SSEDF_CLASS:
2179 case X86_64_NO_CLASS:
2180 case X86_64_SSEUP_CLASS:
2182 case X86_64_X87_CLASS:
2183 case X86_64_X87UP_CLASS:
2187 case X86_64_MEMORY_CLASS:
2192 /* Construct container for the argument used by GCC interface. See
2193 FUNCTION_ARG for the detailed description. */
2195 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2196 enum machine_mode mode;
2199 int nintregs, nsseregs;
2203 enum machine_mode tmpmode;
2205 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2206 enum x86_64_reg_class class[MAX_CLASSES];
2210 int needed_sseregs, needed_intregs;
2211 rtx exp[MAX_CLASSES];
2214 n = classify_argument (mode, type, class, 0);
2215 if (TARGET_DEBUG_ARG)
2218 fprintf (stderr, "Memory class\n");
2221 fprintf (stderr, "Classes:");
2222 for (i = 0; i < n; i++)
2224 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2226 fprintf (stderr, "\n");
2231 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2233 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2236 /* First construct simple cases. Avoid SCmode, since we want to use
2237 single register to pass this type. */
2238 if (n == 1 && mode != SCmode)
2241 case X86_64_INTEGER_CLASS:
2242 case X86_64_INTEGERSI_CLASS:
2243 return gen_rtx_REG (mode, intreg[0]);
2244 case X86_64_SSE_CLASS:
2245 case X86_64_SSESF_CLASS:
2246 case X86_64_SSEDF_CLASS:
2247 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2248 case X86_64_X87_CLASS:
2249 return gen_rtx_REG (mode, FIRST_STACK_REG);
2250 case X86_64_NO_CLASS:
2251 /* Zero sized array, struct or class. */
2256 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2257 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2259 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2260 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2261 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2262 && class[1] == X86_64_INTEGER_CLASS
2263 && (mode == CDImode || mode == TImode)
2264 && intreg[0] + 1 == intreg[1])
2265 return gen_rtx_REG (mode, intreg[0]);
2267 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2268 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2269 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2271 /* Otherwise figure out the entries of the PARALLEL. */
2272 for (i = 0; i < n; i++)
2276 case X86_64_NO_CLASS:
2278 case X86_64_INTEGER_CLASS:
2279 case X86_64_INTEGERSI_CLASS:
2280 /* Merge TImodes on aligned occasions here too. */
2281 if (i * 8 + 8 > bytes)
2282 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2283 else if (class[i] == X86_64_INTEGERSI_CLASS)
2287 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2288 if (tmpmode == BLKmode)
2290 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2291 gen_rtx_REG (tmpmode, *intreg),
2295 case X86_64_SSESF_CLASS:
2296 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2297 gen_rtx_REG (SFmode,
2298 SSE_REGNO (sse_regno)),
2302 case X86_64_SSEDF_CLASS:
2303 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2304 gen_rtx_REG (DFmode,
2305 SSE_REGNO (sse_regno)),
2309 case X86_64_SSE_CLASS:
2310 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2314 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2315 gen_rtx_REG (tmpmode,
2316 SSE_REGNO (sse_regno)),
2318 if (tmpmode == TImode)
2326 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2327 for (i = 0; i < nexps; i++)
2328 XVECEXP (ret, 0, i) = exp [i];
2332 /* Update the data in CUM to advance over an argument
2333 of mode MODE and data type TYPE.
2334 (TYPE is null for libcalls where that information may not be available.) */
2337 function_arg_advance (cum, mode, type, named)
2338 CUMULATIVE_ARGS *cum; /* current arg information */
2339 enum machine_mode mode; /* current arg mode */
2340 tree type; /* type of the argument or 0 if lib support */
2341 int named; /* whether or not the argument was named */
2344 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2345 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2347 if (TARGET_DEBUG_ARG)
2349 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2350 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2353 int int_nregs, sse_nregs;
2354 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2355 cum->words += words;
2356 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2358 cum->nregs -= int_nregs;
2359 cum->sse_nregs -= sse_nregs;
2360 cum->regno += int_nregs;
2361 cum->sse_regno += sse_nregs;
2364 cum->words += words;
2368 if (TARGET_SSE && mode == TImode)
2370 cum->sse_words += words;
2371 cum->sse_nregs -= 1;
2372 cum->sse_regno += 1;
2373 if (cum->sse_nregs <= 0)
2381 cum->words += words;
2382 cum->nregs -= words;
2383 cum->regno += words;
2385 if (cum->nregs <= 0)
2395 /* Define where to put the arguments to a function.
2396 Value is zero to push the argument on the stack,
2397 or a hard register in which to store the argument.
2399 MODE is the argument's machine mode.
2400 TYPE is the data type of the argument (as a tree).
2401 This is null for libcalls where that information may
2403 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2404 the preceding args and about the function being called.
2405 NAMED is nonzero if this argument is a named parameter
2406 (otherwise it is an extra parameter matching an ellipsis). */
2409 function_arg (cum, mode, type, named)
2410 CUMULATIVE_ARGS *cum; /* current arg information */
2411 enum machine_mode mode; /* current arg mode */
2412 tree type; /* type of the argument or 0 if lib support */
2413 int named; /* != 0 for normal args, == 0 for ... args */
2417 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2418 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2420 /* Handle a hidden AL argument containing number of registers for varargs
2421 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2423 if (mode == VOIDmode)
2426 return GEN_INT (cum->maybe_vaarg
2427 ? (cum->sse_nregs < 0
2435 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2436 &x86_64_int_parameter_registers [cum->regno],
2441 /* For now, pass fp/complex values on the stack. */
2450 if (words <= cum->nregs)
2452 int regno = cum->regno;
2454 /* Fastcall allocates the first two DWORD (SImode) or
2455 smaller arguments to ECX and EDX. */
2458 if (mode == BLKmode || mode == DImode)
2461 /* ECX not EAX is the first allocated register. */
2465 ret = gen_rtx_REG (mode, regno);
2470 ret = gen_rtx_REG (mode, cum->sse_regno);
2474 if (TARGET_DEBUG_ARG)
2477 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2478 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2481 print_simple_rtl (stderr, ret);
2483 fprintf (stderr, ", stack");
2485 fprintf (stderr, " )\n");
2491 /* A C expression that indicates when an argument must be passed by
2492 reference. If nonzero for an argument, a copy of that argument is
2493 made in memory and a pointer to the argument is passed instead of
2494 the argument itself. The pointer is passed in whatever way is
2495 appropriate for passing a pointer to that type. */
2498 function_arg_pass_by_reference (cum, mode, type, named)
2499 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2500 enum machine_mode mode ATTRIBUTE_UNUSED;
2502 int named ATTRIBUTE_UNUSED;
2507 if (type && int_size_in_bytes (type) == -1)
2509 if (TARGET_DEBUG_ARG)
2510 fprintf (stderr, "function_arg_pass_by_reference\n");
2517 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2521 ix86_function_arg_boundary (mode, type)
2522 enum machine_mode mode;
2527 return PARM_BOUNDARY;
2529 align = TYPE_ALIGN (type);
2531 align = GET_MODE_ALIGNMENT (mode);
2532 if (align < PARM_BOUNDARY)
2533 align = PARM_BOUNDARY;
2539 /* Return true if N is a possible register number of function value. */
2541 ix86_function_value_regno_p (regno)
2546 return ((regno) == 0
2547 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2548 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2550 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2551 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2552 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2555 /* Define how to find the value returned by a function.
2556 VALTYPE is the data type of the value (as a tree).
2557 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2558 otherwise, FUNC is 0. */
2560 ix86_function_value (valtype)
2565 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2566 REGPARM_MAX, SSE_REGPARM_MAX,
2567 x86_64_int_return_registers, 0);
2568 /* For zero sized structures, construct_container return NULL, but we need
2569 to keep rest of compiler happy by returning meaningful value. */
2571 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2575 return gen_rtx_REG (TYPE_MODE (valtype),
2576 ix86_value_regno (TYPE_MODE (valtype)));
2579 /* Return false iff type is returned in memory. */
2581 ix86_return_in_memory (type)
2584 int needed_intregs, needed_sseregs;
2587 return !examine_argument (TYPE_MODE (type), type, 1,
2588 &needed_intregs, &needed_sseregs);
2592 if (TYPE_MODE (type) == BLKmode
2593 || (VECTOR_MODE_P (TYPE_MODE (type))
2594 && int_size_in_bytes (type) == 8)
2595 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2596 && TYPE_MODE (type) != TFmode
2597 && !VECTOR_MODE_P (TYPE_MODE (type))))
2603 /* Define how to find the value returned by a library function
2604 assuming the value has mode MODE. */
2606 ix86_libcall_value (mode)
2607 enum machine_mode mode;
2617 return gen_rtx_REG (mode, FIRST_SSE_REG);
2620 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2622 return gen_rtx_REG (mode, 0);
2626 return gen_rtx_REG (mode, ix86_value_regno (mode));
2629 /* Given a mode, return the register to use for a return value. */
2632 ix86_value_regno (mode)
2633 enum machine_mode mode;
2635 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2636 return FIRST_FLOAT_REG;
2637 if (mode == TImode || VECTOR_MODE_P (mode))
2638 return FIRST_SSE_REG;
2642 /* Create the va_list data type. */
2645 ix86_build_va_list ()
2647 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2649 /* For i386 we use plain pointer to argument area. */
2651 return build_pointer_type (char_type_node);
2653 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2654 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2656 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2657 unsigned_type_node);
2658 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2659 unsigned_type_node);
2660 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2662 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2665 DECL_FIELD_CONTEXT (f_gpr) = record;
2666 DECL_FIELD_CONTEXT (f_fpr) = record;
2667 DECL_FIELD_CONTEXT (f_ovf) = record;
2668 DECL_FIELD_CONTEXT (f_sav) = record;
2670 TREE_CHAIN (record) = type_decl;
2671 TYPE_NAME (record) = type_decl;
2672 TYPE_FIELDS (record) = f_gpr;
2673 TREE_CHAIN (f_gpr) = f_fpr;
2674 TREE_CHAIN (f_fpr) = f_ovf;
2675 TREE_CHAIN (f_ovf) = f_sav;
2677 layout_type (record);
2679 /* The correct type is an array type of one element. */
2680 return build_array_type (record, build_index_type (size_zero_node));
2683 /* Perform any needed actions needed for a function that is receiving a
2684 variable number of arguments.
2688 MODE and TYPE are the mode and type of the current parameter.
2690 PRETEND_SIZE is a variable that should be set to the amount of stack
2691 that must be pushed by the prolog to pretend that our caller pushed
2694 Normally, this macro will push all remaining incoming registers on the
2695 stack and set PRETEND_SIZE to the length of the registers pushed. */
2698 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2699 CUMULATIVE_ARGS *cum;
2700 enum machine_mode mode;
2702 int *pretend_size ATTRIBUTE_UNUSED;
2706 CUMULATIVE_ARGS next_cum;
2707 rtx save_area = NULL_RTX, mem;
2720 /* Indicate to allocate space on the stack for varargs save area. */
2721 ix86_save_varrargs_registers = 1;
2723 fntype = TREE_TYPE (current_function_decl);
2724 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2725 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2726 != void_type_node));
2728 /* For varargs, we do not want to skip the dummy va_dcl argument.
2729 For stdargs, we do want to skip the last named argument. */
2732 function_arg_advance (&next_cum, mode, type, 1);
2735 save_area = frame_pointer_rtx;
2737 set = get_varargs_alias_set ();
2739 for (i = next_cum.regno; i < ix86_regparm; i++)
2741 mem = gen_rtx_MEM (Pmode,
2742 plus_constant (save_area, i * UNITS_PER_WORD));
2743 set_mem_alias_set (mem, set);
2744 emit_move_insn (mem, gen_rtx_REG (Pmode,
2745 x86_64_int_parameter_registers[i]));
2748 if (next_cum.sse_nregs)
2750 /* Now emit code to save SSE registers. The AX parameter contains number
2751 of SSE parameter registers used to call this function. We use
2752 sse_prologue_save insn template that produces computed jump across
2753 SSE saves. We need some preparation work to get this working. */
2755 label = gen_label_rtx ();
2756 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2758 /* Compute address to jump to :
2759 label - 5*eax + nnamed_sse_arguments*5 */
2760 tmp_reg = gen_reg_rtx (Pmode);
2761 nsse_reg = gen_reg_rtx (Pmode);
2762 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2763 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2764 gen_rtx_MULT (Pmode, nsse_reg,
2766 if (next_cum.sse_regno)
2769 gen_rtx_CONST (DImode,
2770 gen_rtx_PLUS (DImode,
2772 GEN_INT (next_cum.sse_regno * 4))));
2774 emit_move_insn (nsse_reg, label_ref);
2775 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2777 /* Compute address of memory block we save into. We always use pointer
2778 pointing 127 bytes after first byte to store - this is needed to keep
2779 instruction size limited by 4 bytes. */
2780 tmp_reg = gen_reg_rtx (Pmode);
2781 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2782 plus_constant (save_area,
2783 8 * REGPARM_MAX + 127)));
2784 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2785 set_mem_alias_set (mem, set);
2786 set_mem_align (mem, BITS_PER_WORD);
2788 /* And finally do the dirty job! */
2789 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2790 GEN_INT (next_cum.sse_regno), label));
2795 /* Implement va_start. */
2798 ix86_va_start (valist, nextarg)
2802 HOST_WIDE_INT words, n_gpr, n_fpr;
2803 tree f_gpr, f_fpr, f_ovf, f_sav;
2804 tree gpr, fpr, ovf, sav, t;
2806 /* Only 64bit target needs something special. */
2809 std_expand_builtin_va_start (valist, nextarg);
2813 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2814 f_fpr = TREE_CHAIN (f_gpr);
2815 f_ovf = TREE_CHAIN (f_fpr);
2816 f_sav = TREE_CHAIN (f_ovf);
2818 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2819 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2820 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2821 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2822 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2824 /* Count number of gp and fp argument registers used. */
2825 words = current_function_args_info.words;
2826 n_gpr = current_function_args_info.regno;
2827 n_fpr = current_function_args_info.sse_regno;
2829 if (TARGET_DEBUG_ARG)
2830 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2831 (int) words, (int) n_gpr, (int) n_fpr);
2833 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2834 build_int_2 (n_gpr * 8, 0));
2835 TREE_SIDE_EFFECTS (t) = 1;
2836 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2838 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2839 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2840 TREE_SIDE_EFFECTS (t) = 1;
2841 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2843 /* Find the overflow area. */
2844 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2846 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2847 build_int_2 (words * UNITS_PER_WORD, 0));
2848 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2849 TREE_SIDE_EFFECTS (t) = 1;
2850 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2852 /* Find the register save area.
2853 Prologue of the function save it right above stack frame. */
2854 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2855 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2856 TREE_SIDE_EFFECTS (t) = 1;
2857 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2860 /* Implement va_arg. */
2862 ix86_va_arg (valist, type)
2865 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2866 tree f_gpr, f_fpr, f_ovf, f_sav;
2867 tree gpr, fpr, ovf, sav, t;
2869 rtx lab_false, lab_over = NULL_RTX;
2874 /* Only 64bit target needs something special. */
2877 return std_expand_builtin_va_arg (valist, type);
2880 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2881 f_fpr = TREE_CHAIN (f_gpr);
2882 f_ovf = TREE_CHAIN (f_fpr);
2883 f_sav = TREE_CHAIN (f_ovf);
2885 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2886 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2887 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2888 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2889 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2891 size = int_size_in_bytes (type);
2894 /* Passed by reference. */
2896 type = build_pointer_type (type);
2897 size = int_size_in_bytes (type);
2899 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2901 container = construct_container (TYPE_MODE (type), type, 0,
2902 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2904 * Pull the value out of the saved registers ...
2907 addr_rtx = gen_reg_rtx (Pmode);
2911 rtx int_addr_rtx, sse_addr_rtx;
2912 int needed_intregs, needed_sseregs;
2915 lab_over = gen_label_rtx ();
2916 lab_false = gen_label_rtx ();
2918 examine_argument (TYPE_MODE (type), type, 0,
2919 &needed_intregs, &needed_sseregs);
2922 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2923 || TYPE_ALIGN (type) > 128);
2925 /* In case we are passing structure, verify that it is consecutive block
2926 on the register save area. If not we need to do moves. */
2927 if (!need_temp && !REG_P (container))
2929 /* Verify that all registers are strictly consecutive */
2930 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2934 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2936 rtx slot = XVECEXP (container, 0, i);
2937 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2938 || INTVAL (XEXP (slot, 1)) != i * 16)
2946 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2948 rtx slot = XVECEXP (container, 0, i);
2949 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2950 || INTVAL (XEXP (slot, 1)) != i * 8)
2957 int_addr_rtx = addr_rtx;
2958 sse_addr_rtx = addr_rtx;
2962 int_addr_rtx = gen_reg_rtx (Pmode);
2963 sse_addr_rtx = gen_reg_rtx (Pmode);
2965 /* First ensure that we fit completely in registers. */
2968 emit_cmp_and_jump_insns (expand_expr
2969 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2970 GEN_INT ((REGPARM_MAX - needed_intregs +
2971 1) * 8), GE, const1_rtx, SImode,
2976 emit_cmp_and_jump_insns (expand_expr
2977 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2978 GEN_INT ((SSE_REGPARM_MAX -
2979 needed_sseregs + 1) * 16 +
2980 REGPARM_MAX * 8), GE, const1_rtx,
2981 SImode, 1, lab_false);
2984 /* Compute index to start of area used for integer regs. */
2987 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2988 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2989 if (r != int_addr_rtx)
2990 emit_move_insn (int_addr_rtx, r);
2994 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2995 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2996 if (r != sse_addr_rtx)
2997 emit_move_insn (sse_addr_rtx, r);
3004 /* Never use the memory itself, as it has the alias set. */
3005 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3006 mem = gen_rtx_MEM (BLKmode, addr_rtx);
3007 set_mem_alias_set (mem, get_varargs_alias_set ());
3008 set_mem_align (mem, BITS_PER_UNIT);
3010 for (i = 0; i < XVECLEN (container, 0); i++)
3012 rtx slot = XVECEXP (container, 0, i);
3013 rtx reg = XEXP (slot, 0);
3014 enum machine_mode mode = GET_MODE (reg);
3020 if (SSE_REGNO_P (REGNO (reg)))
3022 src_addr = sse_addr_rtx;
3023 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3027 src_addr = int_addr_rtx;
3028 src_offset = REGNO (reg) * 8;
3030 src_mem = gen_rtx_MEM (mode, src_addr);
3031 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3032 src_mem = adjust_address (src_mem, mode, src_offset);
3033 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3034 emit_move_insn (dest_mem, src_mem);
3041 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3042 build_int_2 (needed_intregs * 8, 0));
3043 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3044 TREE_SIDE_EFFECTS (t) = 1;
3045 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3050 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3051 build_int_2 (needed_sseregs * 16, 0));
3052 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3053 TREE_SIDE_EFFECTS (t) = 1;
3054 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3057 emit_jump_insn (gen_jump (lab_over));
3059 emit_label (lab_false);
3062 /* ... otherwise out of the overflow area. */
3064 /* Care for on-stack alignment if needed. */
3065 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3069 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3070 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3071 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3075 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3077 emit_move_insn (addr_rtx, r);
3080 build (PLUS_EXPR, TREE_TYPE (t), t,
3081 build_int_2 (rsize * UNITS_PER_WORD, 0));
3082 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3083 TREE_SIDE_EFFECTS (t) = 1;
3084 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3087 emit_label (lab_over);
3091 r = gen_rtx_MEM (Pmode, addr_rtx);
3092 set_mem_alias_set (r, get_varargs_alias_set ());
3093 emit_move_insn (addr_rtx, r);
3099 /* Return nonzero if OP is either a i387 or SSE fp register. */
3101 any_fp_register_operand (op, mode)
3103 enum machine_mode mode ATTRIBUTE_UNUSED;
3105 return ANY_FP_REG_P (op);
3108 /* Return nonzero if OP is an i387 fp register. */
3110 fp_register_operand (op, mode)
3112 enum machine_mode mode ATTRIBUTE_UNUSED;
3114 return FP_REG_P (op);
3117 /* Return nonzero if OP is a non-fp register_operand. */
3119 register_and_not_any_fp_reg_operand (op, mode)
3121 enum machine_mode mode;
3123 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3126 /* Return nonzero if OP is a register operand other than an
3127 i387 fp register. */
3129 register_and_not_fp_reg_operand (op, mode)
3131 enum machine_mode mode;
3133 return register_operand (op, mode) && !FP_REG_P (op);
3136 /* Return nonzero if OP is general operand representable on x86_64. */
3139 x86_64_general_operand (op, mode)
3141 enum machine_mode mode;
3144 return general_operand (op, mode);
3145 if (nonimmediate_operand (op, mode))
3147 return x86_64_sign_extended_value (op);
3150 /* Return nonzero if OP is general operand representable on x86_64
3151 as either sign extended or zero extended constant. */
3154 x86_64_szext_general_operand (op, mode)
3156 enum machine_mode mode;
3159 return general_operand (op, mode);
3160 if (nonimmediate_operand (op, mode))
3162 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3165 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3168 x86_64_nonmemory_operand (op, mode)
3170 enum machine_mode mode;
3173 return nonmemory_operand (op, mode);
3174 if (register_operand (op, mode))
3176 return x86_64_sign_extended_value (op);
3179 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3182 x86_64_movabs_operand (op, mode)
3184 enum machine_mode mode;
3186 if (!TARGET_64BIT || !flag_pic)
3187 return nonmemory_operand (op, mode);
3188 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3190 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3195 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3198 x86_64_szext_nonmemory_operand (op, mode)
3200 enum machine_mode mode;
3203 return nonmemory_operand (op, mode);
3204 if (register_operand (op, mode))
3206 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3209 /* Return nonzero if OP is immediate operand representable on x86_64. */
3212 x86_64_immediate_operand (op, mode)
3214 enum machine_mode mode;
3217 return immediate_operand (op, mode);
3218 return x86_64_sign_extended_value (op);
3221 /* Return nonzero if OP is immediate operand representable on x86_64. */
3224 x86_64_zext_immediate_operand (op, mode)
3226 enum machine_mode mode ATTRIBUTE_UNUSED;
3228 return x86_64_zero_extended_value (op);
3231 /* Return nonzero if OP is (const_int 1), else return zero. */
3234 const_int_1_operand (op, mode)
3236 enum machine_mode mode ATTRIBUTE_UNUSED;
3238 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3241 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3242 for shift & compare patterns, as shifting by 0 does not change flags),
3243 else return zero. */
3246 const_int_1_31_operand (op, mode)
3248 enum machine_mode mode ATTRIBUTE_UNUSED;
3250 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3253 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3254 reference and a constant. */
3257 symbolic_operand (op, mode)
3259 enum machine_mode mode ATTRIBUTE_UNUSED;
3261 switch (GET_CODE (op))
3269 if (GET_CODE (op) == SYMBOL_REF
3270 || GET_CODE (op) == LABEL_REF
3271 || (GET_CODE (op) == UNSPEC
3272 && (XINT (op, 1) == UNSPEC_GOT
3273 || XINT (op, 1) == UNSPEC_GOTOFF
3274 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3276 if (GET_CODE (op) != PLUS
3277 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3281 if (GET_CODE (op) == SYMBOL_REF
3282 || GET_CODE (op) == LABEL_REF)
3284 /* Only @GOTOFF gets offsets. */
3285 if (GET_CODE (op) != UNSPEC
3286 || XINT (op, 1) != UNSPEC_GOTOFF)
3289 op = XVECEXP (op, 0, 0);
3290 if (GET_CODE (op) == SYMBOL_REF
3291 || GET_CODE (op) == LABEL_REF)
3300 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3303 pic_symbolic_operand (op, mode)
3305 enum machine_mode mode ATTRIBUTE_UNUSED;
3307 if (GET_CODE (op) != CONST)
3312 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3317 if (GET_CODE (op) == UNSPEC)
3319 if (GET_CODE (op) != PLUS
3320 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3323 if (GET_CODE (op) == UNSPEC)
3329 /* Return true if OP is a symbolic operand that resolves locally. */
3332 local_symbolic_operand (op, mode)
3334 enum machine_mode mode ATTRIBUTE_UNUSED;
3336 if (GET_CODE (op) == CONST
3337 && GET_CODE (XEXP (op, 0)) == PLUS
3338 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3339 op = XEXP (XEXP (op, 0), 0);
3341 if (GET_CODE (op) == LABEL_REF)
3344 if (GET_CODE (op) != SYMBOL_REF)
3347 /* These we've been told are local by varasm and encode_section_info
3349 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3352 /* There is, however, a not insubstantial body of code in the rest of
3353 the compiler that assumes it can just stick the results of
3354 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3355 /* ??? This is a hack. Should update the body of the compiler to
3356 always create a DECL an invoke targetm.encode_section_info. */
3357 if (strncmp (XSTR (op, 0), internal_label_prefix,
3358 internal_label_prefix_len) == 0)
3364 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3367 tls_symbolic_operand (op, mode)
3369 enum machine_mode mode ATTRIBUTE_UNUSED;
3371 const char *symbol_str;
3373 if (GET_CODE (op) != SYMBOL_REF)
3375 symbol_str = XSTR (op, 0);
3377 if (symbol_str[0] != '%')
3379 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3383 tls_symbolic_operand_1 (op, kind)
3385 enum tls_model kind;
3387 const char *symbol_str;
3389 if (GET_CODE (op) != SYMBOL_REF)
3391 symbol_str = XSTR (op, 0);
3393 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3397 global_dynamic_symbolic_operand (op, mode)
3399 enum machine_mode mode ATTRIBUTE_UNUSED;
3401 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3405 local_dynamic_symbolic_operand (op, mode)
3407 enum machine_mode mode ATTRIBUTE_UNUSED;
3409 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3413 initial_exec_symbolic_operand (op, mode)
3415 enum machine_mode mode ATTRIBUTE_UNUSED;
3417 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3421 local_exec_symbolic_operand (op, mode)
3423 enum machine_mode mode ATTRIBUTE_UNUSED;
3425 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3428 /* Test for a valid operand for a call instruction. Don't allow the
3429 arg pointer register or virtual regs since they may decay into
3430 reg + const, which the patterns can't handle. */
3433 call_insn_operand (op, mode)
3435 enum machine_mode mode ATTRIBUTE_UNUSED;
3437 /* Disallow indirect through a virtual register. This leads to
3438 compiler aborts when trying to eliminate them. */
3439 if (GET_CODE (op) == REG
3440 && (op == arg_pointer_rtx
3441 || op == frame_pointer_rtx
3442 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3443 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3446 /* Disallow `call 1234'. Due to varying assembler lameness this
3447 gets either rejected or translated to `call .+1234'. */
3448 if (GET_CODE (op) == CONST_INT)
3451 /* Explicitly allow SYMBOL_REF even if pic. */
3452 if (GET_CODE (op) == SYMBOL_REF)
3455 /* Otherwise we can allow any general_operand in the address. */
3456 return general_operand (op, Pmode);
3459 /* Test for a valid operand for a call instruction. Don't allow the
3460 arg pointer register or virtual regs since they may decay into
3461 reg + const, which the patterns can't handle. */
3464 sibcall_insn_operand (op, mode)
3466 enum machine_mode mode ATTRIBUTE_UNUSED;
3468 /* Disallow indirect through a virtual register. This leads to
3469 compiler aborts when trying to eliminate them. */
3470 if (GET_CODE (op) == REG
3471 && (op == arg_pointer_rtx
3472 || op == frame_pointer_rtx
3473 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3474 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3477 /* Explicitly allow SYMBOL_REF even if pic. */
3478 if (GET_CODE (op) == SYMBOL_REF)
3481 /* Otherwise we can only allow register operands. */
3482 return register_operand (op, Pmode);
3486 constant_call_address_operand (op, mode)
3488 enum machine_mode mode ATTRIBUTE_UNUSED;
3490 if (GET_CODE (op) == CONST
3491 && GET_CODE (XEXP (op, 0)) == PLUS
3492 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3493 op = XEXP (XEXP (op, 0), 0);
3494 return GET_CODE (op) == SYMBOL_REF;
3497 /* Match exactly zero and one. */
3500 const0_operand (op, mode)
3502 enum machine_mode mode;
3504 return op == CONST0_RTX (mode);
3508 const1_operand (op, mode)
3510 enum machine_mode mode ATTRIBUTE_UNUSED;
3512 return op == const1_rtx;
3515 /* Match 2, 4, or 8. Used for leal multiplicands. */
3518 const248_operand (op, mode)
3520 enum machine_mode mode ATTRIBUTE_UNUSED;
3522 return (GET_CODE (op) == CONST_INT
3523 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3526 /* True if this is a constant appropriate for an increment or decrement. */
3529 incdec_operand (op, mode)
3531 enum machine_mode mode ATTRIBUTE_UNUSED;
3533 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3534 registers, since carry flag is not set. */
3535 if (TARGET_PENTIUM4 && !optimize_size)
3537 return op == const1_rtx || op == constm1_rtx;
3540 /* Return nonzero if OP is acceptable as operand of DImode shift
3544 shiftdi_operand (op, mode)
3546 enum machine_mode mode ATTRIBUTE_UNUSED;
3549 return nonimmediate_operand (op, mode);
3551 return register_operand (op, mode);
3554 /* Return false if this is the stack pointer, or any other fake
3555 register eliminable to the stack pointer. Otherwise, this is
3558 This is used to prevent esp from being used as an index reg.
3559 Which would only happen in pathological cases. */
3562 reg_no_sp_operand (op, mode)
3564 enum machine_mode mode;
3567 if (GET_CODE (t) == SUBREG)
3569 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3572 return register_operand (op, mode);
3576 mmx_reg_operand (op, mode)
3578 enum machine_mode mode ATTRIBUTE_UNUSED;
3580 return MMX_REG_P (op);
3583 /* Return false if this is any eliminable register. Otherwise
3587 general_no_elim_operand (op, mode)
3589 enum machine_mode mode;
3592 if (GET_CODE (t) == SUBREG)
3594 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3595 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3596 || t == virtual_stack_dynamic_rtx)
3599 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3600 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3603 return general_operand (op, mode);
3606 /* Return false if this is any eliminable register. Otherwise
3607 register_operand or const_int. */
3610 nonmemory_no_elim_operand (op, mode)
3612 enum machine_mode mode;
3615 if (GET_CODE (t) == SUBREG)
3617 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3618 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3619 || t == virtual_stack_dynamic_rtx)
3622 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3625 /* Return false if this is any eliminable register or stack register,
3626 otherwise work like register_operand. */
3629 index_register_operand (op, mode)
3631 enum machine_mode mode;
3634 if (GET_CODE (t) == SUBREG)
3638 if (t == arg_pointer_rtx
3639 || t == frame_pointer_rtx
3640 || t == virtual_incoming_args_rtx
3641 || t == virtual_stack_vars_rtx
3642 || t == virtual_stack_dynamic_rtx
3643 || REGNO (t) == STACK_POINTER_REGNUM)
3646 return general_operand (op, mode);
3649 /* Return true if op is a Q_REGS class register. */
3652 q_regs_operand (op, mode)
3654 enum machine_mode mode;
3656 if (mode != VOIDmode && GET_MODE (op) != mode)
3658 if (GET_CODE (op) == SUBREG)
3659 op = SUBREG_REG (op);
3660 return ANY_QI_REG_P (op);
3663 /* Return true if op is an flags register. */
3666 flags_reg_operand (op, mode)
3668 enum machine_mode mode;
3670 if (mode != VOIDmode && GET_MODE (op) != mode)
3672 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3675 /* Return true if op is a NON_Q_REGS class register. */
3678 non_q_regs_operand (op, mode)
3680 enum machine_mode mode;
3682 if (mode != VOIDmode && GET_MODE (op) != mode)
3684 if (GET_CODE (op) == SUBREG)
3685 op = SUBREG_REG (op);
3686 return NON_QI_REG_P (op);
3690 zero_extended_scalar_load_operand (op, mode)
3692 enum machine_mode mode ATTRIBUTE_UNUSED;
3695 if (GET_CODE (op) != MEM)
3697 op = maybe_get_pool_constant (op);
3700 if (GET_CODE (op) != CONST_VECTOR)
3703 (GET_MODE_SIZE (GET_MODE (op)) /
3704 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3705 for (n_elts--; n_elts > 0; n_elts--)
3707 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3708 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3714 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3717 sse_comparison_operator (op, mode)
3719 enum machine_mode mode ATTRIBUTE_UNUSED;
3721 enum rtx_code code = GET_CODE (op);
3724 /* Operations supported directly. */
3734 /* These are equivalent to ones above in non-IEEE comparisons. */
3741 return !TARGET_IEEE_FP;
3746 /* Return 1 if OP is a valid comparison operator in valid mode. */
3748 ix86_comparison_operator (op, mode)
3750 enum machine_mode mode;
3752 enum machine_mode inmode;
3753 enum rtx_code code = GET_CODE (op);
3754 if (mode != VOIDmode && GET_MODE (op) != mode)
3756 if (GET_RTX_CLASS (code) != '<')
3758 inmode = GET_MODE (XEXP (op, 0));
3760 if (inmode == CCFPmode || inmode == CCFPUmode)
3762 enum rtx_code second_code, bypass_code;
3763 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3764 return (bypass_code == NIL && second_code == NIL);
3771 if (inmode == CCmode || inmode == CCGCmode
3772 || inmode == CCGOCmode || inmode == CCNOmode)
3775 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3776 if (inmode == CCmode)
3780 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3788 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3791 fcmov_comparison_operator (op, mode)
3793 enum machine_mode mode;
3795 enum machine_mode inmode;
3796 enum rtx_code code = GET_CODE (op);
3797 if (mode != VOIDmode && GET_MODE (op) != mode)
3799 if (GET_RTX_CLASS (code) != '<')
3801 inmode = GET_MODE (XEXP (op, 0));
3802 if (inmode == CCFPmode || inmode == CCFPUmode)
3804 enum rtx_code second_code, bypass_code;
3805 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3806 if (bypass_code != NIL || second_code != NIL)
3808 code = ix86_fp_compare_code_to_integer (code);
3810 /* i387 supports just limited amount of conditional codes. */
3813 case LTU: case GTU: case LEU: case GEU:
3814 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3817 case ORDERED: case UNORDERED:
3825 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3828 promotable_binary_operator (op, mode)
3830 enum machine_mode mode ATTRIBUTE_UNUSED;
3832 switch (GET_CODE (op))
3835 /* Modern CPUs have same latency for HImode and SImode multiply,
3836 but 386 and 486 do HImode multiply faster. */
3837 return ix86_cpu > PROCESSOR_I486;
3849 /* Nearly general operand, but accept any const_double, since we wish
3850 to be able to drop them into memory rather than have them get pulled
3854 cmp_fp_expander_operand (op, mode)
3856 enum machine_mode mode;
3858 if (mode != VOIDmode && mode != GET_MODE (op))
3860 if (GET_CODE (op) == CONST_DOUBLE)
3862 return general_operand (op, mode);
3865 /* Match an SI or HImode register for a zero_extract. */
3868 ext_register_operand (op, mode)
3870 enum machine_mode mode ATTRIBUTE_UNUSED;
3873 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3874 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3877 if (!register_operand (op, VOIDmode))
3880 /* Be careful to accept only registers having upper parts. */
3881 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3882 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3885 /* Return 1 if this is a valid binary floating-point operation.
3886 OP is the expression matched, and MODE is its mode. */
3889 binary_fp_operator (op, mode)
3891 enum machine_mode mode;
3893 if (mode != VOIDmode && mode != GET_MODE (op))
3896 switch (GET_CODE (op))
3902 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3910 mult_operator (op, mode)
3912 enum machine_mode mode ATTRIBUTE_UNUSED;
3914 return GET_CODE (op) == MULT;
3918 div_operator (op, mode)
3920 enum machine_mode mode ATTRIBUTE_UNUSED;
3922 return GET_CODE (op) == DIV;
3926 arith_or_logical_operator (op, mode)
3928 enum machine_mode mode;
3930 return ((mode == VOIDmode || GET_MODE (op) == mode)
3931 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3932 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3935 /* Returns 1 if OP is memory operand with a displacement. */
3938 memory_displacement_operand (op, mode)
3940 enum machine_mode mode;
3942 struct ix86_address parts;
3944 if (! memory_operand (op, mode))
3947 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3950 return parts.disp != NULL_RTX;
3953 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3954 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3956 ??? It seems likely that this will only work because cmpsi is an
3957 expander, and no actual insns use this. */
3960 cmpsi_operand (op, mode)
3962 enum machine_mode mode;
3964 if (nonimmediate_operand (op, mode))
3967 if (GET_CODE (op) == AND
3968 && GET_MODE (op) == SImode
3969 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3970 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3971 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3972 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3973 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3974 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3980 /* Returns 1 if OP is memory operand that can not be represented by the
3984 long_memory_operand (op, mode)
3986 enum machine_mode mode;
3988 if (! memory_operand (op, mode))
3991 return memory_address_length (op) != 0;
3994 /* Return nonzero if the rtx is known aligned. */
3997 aligned_operand (op, mode)
3999 enum machine_mode mode;
4001 struct ix86_address parts;
4003 if (!general_operand (op, mode))
4006 /* Registers and immediate operands are always "aligned". */
4007 if (GET_CODE (op) != MEM)
4010 /* Don't even try to do any aligned optimizations with volatiles. */
4011 if (MEM_VOLATILE_P (op))
4016 /* Pushes and pops are only valid on the stack pointer. */
4017 if (GET_CODE (op) == PRE_DEC
4018 || GET_CODE (op) == POST_INC)
4021 /* Decode the address. */
4022 if (! ix86_decompose_address (op, &parts))
4025 if (parts.base && GET_CODE (parts.base) == SUBREG)
4026 parts.base = SUBREG_REG (parts.base);
4027 if (parts.index && GET_CODE (parts.index) == SUBREG)
4028 parts.index = SUBREG_REG (parts.index);
4030 /* Look for some component that isn't known to be aligned. */
4034 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4039 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4044 if (GET_CODE (parts.disp) != CONST_INT
4045 || (INTVAL (parts.disp) & 3) != 0)
4049 /* Didn't find one -- this must be an aligned address. */
4053 /* Return true if the constant is something that can be loaded with
4054 a special instruction. Only handle 0.0 and 1.0; others are less
4058 standard_80387_constant_p (x)
4061 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4063 /* Note that on the 80387, other constants, such as pi, that we should support
4064 too. On some machines, these are much slower to load as standard constant,
4065 than to load from doubles in memory. */
4066 if (x == CONST0_RTX (GET_MODE (x)))
4068 if (x == CONST1_RTX (GET_MODE (x)))
4073 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4076 standard_sse_constant_p (x)
4079 if (x == const0_rtx)
4081 return (x == CONST0_RTX (GET_MODE (x)));
4084 /* Returns 1 if OP contains a symbol reference */
4087 symbolic_reference_mentioned_p (op)
4090 register const char *fmt;
4093 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4096 fmt = GET_RTX_FORMAT (GET_CODE (op));
4097 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4103 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4104 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4108 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4115 /* Return 1 if it is appropriate to emit `ret' instructions in the
4116 body of a function. Do this only if the epilogue is simple, needing a
4117 couple of insns. Prior to reloading, we can't tell how many registers
4118 must be saved, so return 0 then. Return 0 if there is no frame
4119 marker to de-allocate.
4121 If NON_SAVING_SETJMP is defined and true, then it is not possible
4122 for the epilogue to be simple, so return 0. This is a special case
4123 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4124 until final, but jump_optimize may need to know sooner if a
4128 ix86_can_use_return_insn_p ()
4130 struct ix86_frame frame;
4132 #ifdef NON_SAVING_SETJMP
4133 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4137 if (! reload_completed || frame_pointer_needed)
4140 /* Don't allow more than 32 pop, since that's all we can do
4141 with one instruction. */
4142 if (current_function_pops_args
4143 && current_function_args_size >= 32768)
4146 ix86_compute_frame_layout (&frame);
4147 return frame.to_allocate == 0 && frame.nregs == 0;
4150 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4152 x86_64_sign_extended_value (value)
4155 switch (GET_CODE (value))
4157 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4158 to be at least 32 and this all acceptable constants are
4159 represented as CONST_INT. */
4161 if (HOST_BITS_PER_WIDE_INT == 32)
4165 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4166 return trunc_int_for_mode (val, SImode) == val;
4170 /* For certain code models, the symbolic references are known to fit.
4171 in CM_SMALL_PIC model we know it fits if it is local to the shared
4172 library. Don't count TLS SYMBOL_REFs here, since they should fit
4173 only if inside of UNSPEC handled below. */
4175 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4177 /* For certain code models, the code is near as well. */
4179 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4180 || ix86_cmodel == CM_KERNEL);
4182 /* We also may accept the offsetted memory references in certain special
4185 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4186 switch (XINT (XEXP (value, 0), 1))
4188 case UNSPEC_GOTPCREL:
4190 case UNSPEC_GOTNTPOFF:
4196 if (GET_CODE (XEXP (value, 0)) == PLUS)
4198 rtx op1 = XEXP (XEXP (value, 0), 0);
4199 rtx op2 = XEXP (XEXP (value, 0), 1);
4200 HOST_WIDE_INT offset;
4202 if (ix86_cmodel == CM_LARGE)
4204 if (GET_CODE (op2) != CONST_INT)
4206 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4207 switch (GET_CODE (op1))
4210 /* For CM_SMALL assume that latest object is 16MB before
4211 end of 31bits boundary. We may also accept pretty
4212 large negative constants knowing that all objects are
4213 in the positive half of address space. */
4214 if (ix86_cmodel == CM_SMALL
4215 && offset < 16*1024*1024
4216 && trunc_int_for_mode (offset, SImode) == offset)
4218 /* For CM_KERNEL we know that all object resist in the
4219 negative half of 32bits address space. We may not
4220 accept negative offsets, since they may be just off
4221 and we may accept pretty large positive ones. */
4222 if (ix86_cmodel == CM_KERNEL
4224 && trunc_int_for_mode (offset, SImode) == offset)
4228 /* These conditions are similar to SYMBOL_REF ones, just the
4229 constraints for code models differ. */
4230 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4231 && offset < 16*1024*1024
4232 && trunc_int_for_mode (offset, SImode) == offset)
4234 if (ix86_cmodel == CM_KERNEL
4236 && trunc_int_for_mode (offset, SImode) == offset)
4240 switch (XINT (op1, 1))
4245 && trunc_int_for_mode (offset, SImode) == offset)
4259 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4261 x86_64_zero_extended_value (value)
4264 switch (GET_CODE (value))
4267 if (HOST_BITS_PER_WIDE_INT == 32)
4268 return (GET_MODE (value) == VOIDmode
4269 && !CONST_DOUBLE_HIGH (value));
4273 if (HOST_BITS_PER_WIDE_INT == 32)
4274 return INTVAL (value) >= 0;
4276 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4279 /* For certain code models, the symbolic references are known to fit. */
4281 return ix86_cmodel == CM_SMALL;
4283 /* For certain code models, the code is near as well. */
4285 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4287 /* We also may accept the offsetted memory references in certain special
4290 if (GET_CODE (XEXP (value, 0)) == PLUS)
4292 rtx op1 = XEXP (XEXP (value, 0), 0);
4293 rtx op2 = XEXP (XEXP (value, 0), 1);
4295 if (ix86_cmodel == CM_LARGE)
4297 switch (GET_CODE (op1))
4301 /* For small code model we may accept pretty large positive
4302 offsets, since one bit is available for free. Negative
4303 offsets are limited by the size of NULL pointer area
4304 specified by the ABI. */
4305 if (ix86_cmodel == CM_SMALL
4306 && GET_CODE (op2) == CONST_INT
4307 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4308 && (trunc_int_for_mode (INTVAL (op2), SImode)
4311 /* ??? For the kernel, we may accept adjustment of
4312 -0x10000000, since we know that it will just convert
4313 negative address space to positive, but perhaps this
4314 is not worthwhile. */
4317 /* These conditions are similar to SYMBOL_REF ones, just the
4318 constraints for code models differ. */
4319 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4320 && GET_CODE (op2) == CONST_INT
4321 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4322 && (trunc_int_for_mode (INTVAL (op2), SImode)
4336 /* Value should be nonzero if functions must have frame pointers.
4337 Zero means the frame pointer need not be set up (and parms may
4338 be accessed via the stack pointer) in functions that seem suitable. */
4341 ix86_frame_pointer_required ()
4343 /* If we accessed previous frames, then the generated code expects
4344 to be able to access the saved ebp value in our frame. */
4345 if (cfun->machine->accesses_prev_frame)
4348 /* Several x86 os'es need a frame pointer for other reasons,
4349 usually pertaining to setjmp. */
4350 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4353 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4354 the frame pointer by default. Turn it back on now if we've not
4355 got a leaf function. */
4356 if (TARGET_OMIT_LEAF_FRAME_POINTER
4357 && (!current_function_is_leaf))
4360 if (current_function_profile)
4366 /* Record that the current function accesses previous call frames. */
4369 ix86_setup_frame_addresses ()
4371 cfun->machine->accesses_prev_frame = 1;
4374 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4375 # define USE_HIDDEN_LINKONCE 1
4377 # define USE_HIDDEN_LINKONCE 0
4380 static int pic_labels_used;
4382 /* Fills in the label name that should be used for a pc thunk for
4383 the given register. */
4386 get_pc_thunk_name (name, regno)
4390 if (USE_HIDDEN_LINKONCE)
4391 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4393 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4397 /* This function generates code for -fpic that loads %ebx with
4398 the return address of the caller and then returns. */
4401 ix86_asm_file_end (file)
4407 for (regno = 0; regno < 8; ++regno)
4411 if (! ((pic_labels_used >> regno) & 1))
4414 get_pc_thunk_name (name, regno);
4416 if (USE_HIDDEN_LINKONCE)
4420 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4422 TREE_PUBLIC (decl) = 1;
4423 TREE_STATIC (decl) = 1;
4424 DECL_ONE_ONLY (decl) = 1;
4426 (*targetm.asm_out.unique_section) (decl, 0);
4427 named_section (decl, NULL, 0);
4429 (*targetm.asm_out.globalize_label) (file, name);
4430 fputs ("\t.hidden\t", file);
4431 assemble_name (file, name);
4433 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4438 ASM_OUTPUT_LABEL (file, name);
4441 xops[0] = gen_rtx_REG (SImode, regno);
4442 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4443 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4444 output_asm_insn ("ret", xops);
4448 /* Emit code for the SET_GOT patterns. */
4451 output_set_got (dest)
4457 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4459 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4461 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4464 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4466 output_asm_insn ("call\t%a2", xops);
4469 /* Output the "canonical" label name ("Lxx$pb") here too. This
4470 is what will be referred to by the Mach-O PIC subsystem. */
4471 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4473 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4474 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4477 output_asm_insn ("pop{l}\t%0", xops);
4482 get_pc_thunk_name (name, REGNO (dest));
4483 pic_labels_used |= 1 << REGNO (dest);
4485 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4486 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4487 output_asm_insn ("call\t%X2", xops);
4490 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4491 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4492 else if (!TARGET_MACHO)
4493 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4498 /* Generate an "push" pattern for input ARG. */
4504 return gen_rtx_SET (VOIDmode,
4506 gen_rtx_PRE_DEC (Pmode,
4507 stack_pointer_rtx)),
4511 /* Return >= 0 if there is an unused call-clobbered register available
4512 for the entire function. */
4515 ix86_select_alt_pic_regnum ()
4517 if (current_function_is_leaf && !current_function_profile)
4520 for (i = 2; i >= 0; --i)
4521 if (!regs_ever_live[i])
4525 return INVALID_REGNUM;
4528 /* Return 1 if we need to save REGNO. */
4530 ix86_save_reg (regno, maybe_eh_return)
4532 int maybe_eh_return;
4534 if (pic_offset_table_rtx
4535 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4536 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4537 || current_function_profile
4538 || current_function_calls_eh_return))
4540 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4545 if (current_function_calls_eh_return && maybe_eh_return)
4550 unsigned test = EH_RETURN_DATA_REGNO (i);
4551 if (test == INVALID_REGNUM)
4558 return (regs_ever_live[regno]
4559 && !call_used_regs[regno]
4560 && !fixed_regs[regno]
4561 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4564 /* Return number of registers to be saved on the stack. */
4572 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4573 if (ix86_save_reg (regno, true))
4578 /* Return the offset between two registers, one to be eliminated, and the other
4579 its replacement, at the start of a routine. */
4582 ix86_initial_elimination_offset (from, to)
4586 struct ix86_frame frame;
4587 ix86_compute_frame_layout (&frame);
4589 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4590 return frame.hard_frame_pointer_offset;
4591 else if (from == FRAME_POINTER_REGNUM
4592 && to == HARD_FRAME_POINTER_REGNUM)
4593 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4596 if (to != STACK_POINTER_REGNUM)
4598 else if (from == ARG_POINTER_REGNUM)
4599 return frame.stack_pointer_offset;
4600 else if (from != FRAME_POINTER_REGNUM)
4603 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4607 /* Fill structure ix86_frame about frame of currently computed function. */
4610 ix86_compute_frame_layout (frame)
4611 struct ix86_frame *frame;
4613 HOST_WIDE_INT total_size;
4614 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4616 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4617 HOST_WIDE_INT size = get_frame_size ();
4619 frame->nregs = ix86_nsaved_regs ();
4622 /* Skip return address and saved base pointer. */
4623 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4625 frame->hard_frame_pointer_offset = offset;
4627 /* Do some sanity checking of stack_alignment_needed and
4628 preferred_alignment, since i386 port is the only using those features
4629 that may break easily. */
4631 if (size && !stack_alignment_needed)
4633 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4635 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4637 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4640 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4641 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4643 /* Register save area */
4644 offset += frame->nregs * UNITS_PER_WORD;
4647 if (ix86_save_varrargs_registers)
4649 offset += X86_64_VARARGS_SIZE;
4650 frame->va_arg_size = X86_64_VARARGS_SIZE;
4653 frame->va_arg_size = 0;
4655 /* Align start of frame for local function. */
4656 frame->padding1 = ((offset + stack_alignment_needed - 1)
4657 & -stack_alignment_needed) - offset;
4659 offset += frame->padding1;
4661 /* Frame pointer points here. */
4662 frame->frame_pointer_offset = offset;
4666 /* Add outgoing arguments area. Can be skipped if we eliminated
4667 all the function calls as dead code. */
4668 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4670 offset += current_function_outgoing_args_size;
4671 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4674 frame->outgoing_arguments_size = 0;
4676 /* Align stack boundary. Only needed if we're calling another function
4678 if (!current_function_is_leaf || current_function_calls_alloca)
4679 frame->padding2 = ((offset + preferred_alignment - 1)
4680 & -preferred_alignment) - offset;
4682 frame->padding2 = 0;
4684 offset += frame->padding2;
4686 /* We've reached end of stack frame. */
4687 frame->stack_pointer_offset = offset;
4689 /* Size prologue needs to allocate. */
4690 frame->to_allocate =
4691 (size + frame->padding1 + frame->padding2
4692 + frame->outgoing_arguments_size + frame->va_arg_size);
4694 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4695 && current_function_is_leaf)
4697 frame->red_zone_size = frame->to_allocate;
4698 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4699 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4702 frame->red_zone_size = 0;
4703 frame->to_allocate -= frame->red_zone_size;
4704 frame->stack_pointer_offset -= frame->red_zone_size;
4706 fprintf (stderr, "nregs: %i\n", frame->nregs);
4707 fprintf (stderr, "size: %i\n", size);
4708 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4709 fprintf (stderr, "padding1: %i\n", frame->padding1);
4710 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4711 fprintf (stderr, "padding2: %i\n", frame->padding2);
4712 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4713 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4714 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4715 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4716 frame->hard_frame_pointer_offset);
4717 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4721 /* Emit code to save registers in the prologue. */
4724 ix86_emit_save_regs ()
4729 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4730 if (ix86_save_reg (regno, true))
4732 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4733 RTX_FRAME_RELATED_P (insn) = 1;
4737 /* Emit code to save registers using MOV insns. First register
4738 is restored from POINTER + OFFSET. */
4740 ix86_emit_save_regs_using_mov (pointer, offset)
4742 HOST_WIDE_INT offset;
4747 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4748 if (ix86_save_reg (regno, true))
4750 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4752 gen_rtx_REG (Pmode, regno));
4753 RTX_FRAME_RELATED_P (insn) = 1;
4754 offset += UNITS_PER_WORD;
4758 /* Expand the prologue into a bunch of separate insns. */
4761 ix86_expand_prologue ()
4765 struct ix86_frame frame;
4767 HOST_WIDE_INT allocate;
4769 ix86_compute_frame_layout (&frame);
4772 int count = frame.nregs;
4774 /* The fast prologue uses move instead of push to save registers. This
4775 is significantly longer, but also executes faster as modern hardware
4776 can execute the moves in parallel, but can't do that for push/pop.
4778 Be careful about choosing what prologue to emit: When function takes
4779 many instructions to execute we may use slow version as well as in
4780 case function is known to be outside hot spot (this is known with
4781 feedback only). Weight the size of function by number of registers
4782 to save as it is cheap to use one or two push instructions but very
4783 slow to use many of them. */
4785 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4786 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4787 || (flag_branch_probabilities
4788 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4789 use_fast_prologue_epilogue = 0;
4791 use_fast_prologue_epilogue = !expensive_function_p (count);
4792 if (TARGET_PROLOGUE_USING_MOVE)
4793 use_mov = use_fast_prologue_epilogue;
4796 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4797 slower on all targets. Also sdb doesn't like it. */
4799 if (frame_pointer_needed)
4801 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4802 RTX_FRAME_RELATED_P (insn) = 1;
4804 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4805 RTX_FRAME_RELATED_P (insn) = 1;
4808 allocate = frame.to_allocate;
4809 /* In case we are dealing only with single register and empty frame,
4810 push is equivalent of the mov+add sequence. */
4811 if (allocate == 0 && frame.nregs <= 1)
4815 ix86_emit_save_regs ();
4817 allocate += frame.nregs * UNITS_PER_WORD;
4821 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4823 insn = emit_insn (gen_pro_epilogue_adjust_stack
4824 (stack_pointer_rtx, stack_pointer_rtx,
4825 GEN_INT (-allocate)));
4826 RTX_FRAME_RELATED_P (insn) = 1;
4830 /* ??? Is this only valid for Win32? */
4837 arg0 = gen_rtx_REG (SImode, 0);
4838 emit_move_insn (arg0, GEN_INT (allocate));
4840 sym = gen_rtx_MEM (FUNCTION_MODE,
4841 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4842 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4844 CALL_INSN_FUNCTION_USAGE (insn)
4845 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4846 CALL_INSN_FUNCTION_USAGE (insn));
4848 /* Don't allow scheduling pass to move insns across __alloca
4850 emit_insn (gen_blockage (const0_rtx));
4854 if (!frame_pointer_needed || !frame.to_allocate)
4855 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4857 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4858 -frame.nregs * UNITS_PER_WORD);
4861 #ifdef SUBTARGET_PROLOGUE
4865 pic_reg_used = false;
4866 if (pic_offset_table_rtx
4867 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4868 || current_function_profile))
4870 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4872 if (alt_pic_reg_used != INVALID_REGNUM)
4873 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4875 pic_reg_used = true;
4880 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4882 /* Even with accurate pre-reload life analysis, we can wind up
4883 deleting all references to the pic register after reload.
4884 Consider if cross-jumping unifies two sides of a branch
4885 controlled by a comparison vs the only read from a global.
4886 In which case, allow the set_got to be deleted, though we're
4887 too late to do anything about the ebx save in the prologue. */
4888 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4891 /* Prevent function calls from be scheduled before the call to mcount.
4892 In the pic_reg_used case, make sure that the got load isn't deleted. */
4893 if (current_function_profile)
4894 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4897 /* Emit code to restore saved registers using MOV insns. First register
4898 is restored from POINTER + OFFSET. */
4900 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4903 int maybe_eh_return;
4907 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4908 if (ix86_save_reg (regno, maybe_eh_return))
4910 emit_move_insn (gen_rtx_REG (Pmode, regno),
4911 adjust_address (gen_rtx_MEM (Pmode, pointer),
4913 offset += UNITS_PER_WORD;
4917 /* Restore function stack, frame, and registers. */
4920 ix86_expand_epilogue (style)
4924 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4925 struct ix86_frame frame;
4926 HOST_WIDE_INT offset;
4928 ix86_compute_frame_layout (&frame);
4930 /* Calculate start of saved registers relative to ebp. Special care
4931 must be taken for the normal return case of a function using
4932 eh_return: the eax and edx registers are marked as saved, but not
4933 restored along this path. */
4934 offset = frame.nregs;
4935 if (current_function_calls_eh_return && style != 2)
4937 offset *= -UNITS_PER_WORD;
4939 /* If we're only restoring one register and sp is not valid then
4940 using a move instruction to restore the register since it's
4941 less work than reloading sp and popping the register.
4943 The default code result in stack adjustment using add/lea instruction,
4944 while this code results in LEAVE instruction (or discrete equivalent),
4945 so it is profitable in some other cases as well. Especially when there
4946 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4947 and there is exactly one register to pop. This heuristic may need some
4948 tuning in future. */
4949 if ((!sp_valid && frame.nregs <= 1)
4950 || (TARGET_EPILOGUE_USING_MOVE
4951 && use_fast_prologue_epilogue
4952 && (frame.nregs > 1 || frame.to_allocate))
4953 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4954 || (frame_pointer_needed && TARGET_USE_LEAVE
4955 && use_fast_prologue_epilogue && frame.nregs == 1)
4956 || current_function_calls_eh_return)
4958 /* Restore registers. We can use ebp or esp to address the memory
4959 locations. If both are available, default to ebp, since offsets
4960 are known to be small. Only exception is esp pointing directly to the
4961 end of block of saved registers, where we may simplify addressing
4964 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4965 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4966 frame.to_allocate, style == 2);
4968 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4969 offset, style == 2);
4971 /* eh_return epilogues need %ecx added to the stack pointer. */
4974 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4976 if (frame_pointer_needed)
4978 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4979 tmp = plus_constant (tmp, UNITS_PER_WORD);
4980 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4982 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4983 emit_move_insn (hard_frame_pointer_rtx, tmp);
4985 emit_insn (gen_pro_epilogue_adjust_stack
4986 (stack_pointer_rtx, sa, const0_rtx));
4990 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4991 tmp = plus_constant (tmp, (frame.to_allocate
4992 + frame.nregs * UNITS_PER_WORD));
4993 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4996 else if (!frame_pointer_needed)
4997 emit_insn (gen_pro_epilogue_adjust_stack
4998 (stack_pointer_rtx, stack_pointer_rtx,
4999 GEN_INT (frame.to_allocate
5000 + frame.nregs * UNITS_PER_WORD)));
5001 /* If not an i386, mov & pop is faster than "leave". */
5002 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
5003 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5006 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5007 hard_frame_pointer_rtx,
5010 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5012 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5017 /* First step is to deallocate the stack frame so that we can
5018 pop the registers. */
5021 if (!frame_pointer_needed)
5023 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5024 hard_frame_pointer_rtx,
5027 else if (frame.to_allocate)
5028 emit_insn (gen_pro_epilogue_adjust_stack
5029 (stack_pointer_rtx, stack_pointer_rtx,
5030 GEN_INT (frame.to_allocate)));
5032 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5033 if (ix86_save_reg (regno, false))
5036 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5038 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5040 if (frame_pointer_needed)
5042 /* Leave results in shorter dependency chains on CPUs that are
5043 able to grok it fast. */
5044 if (TARGET_USE_LEAVE)
5045 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5046 else if (TARGET_64BIT)
5047 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5049 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5053 /* Sibcall epilogues don't want a return instruction. */
5057 if (current_function_pops_args && current_function_args_size)
5059 rtx popc = GEN_INT (current_function_pops_args);
5061 /* i386 can only pop 64K bytes. If asked to pop more, pop
5062 return address, do explicit add, and jump indirectly to the
5065 if (current_function_pops_args >= 65536)
5067 rtx ecx = gen_rtx_REG (SImode, 2);
5069 /* There are is no "pascal" calling convention in 64bit ABI. */
5073 emit_insn (gen_popsi1 (ecx));
5074 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5075 emit_jump_insn (gen_return_indirect_internal (ecx));
5078 emit_jump_insn (gen_return_pop_internal (popc));
5081 emit_jump_insn (gen_return_internal ());
5084 /* Reset from the function's potential modifications. */
5087 ix86_output_function_epilogue (file, size)
5088 FILE *file ATTRIBUTE_UNUSED;
5089 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5091 if (pic_offset_table_rtx)
5092 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5095 /* Extract the parts of an RTL expression that is a valid memory address
5096 for an instruction. Return 0 if the structure of the address is
5097 grossly off. Return -1 if the address contains ASHIFT, so it is not
5098 strictly valid, but still used for computing length of lea instruction.
5102 ix86_decompose_address (addr, out)
5104 struct ix86_address *out;
5106 rtx base = NULL_RTX;
5107 rtx index = NULL_RTX;
5108 rtx disp = NULL_RTX;
5109 HOST_WIDE_INT scale = 1;
5110 rtx scale_rtx = NULL_RTX;
5113 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5115 else if (GET_CODE (addr) == PLUS)
5117 rtx op0 = XEXP (addr, 0);
5118 rtx op1 = XEXP (addr, 1);
5119 enum rtx_code code0 = GET_CODE (op0);
5120 enum rtx_code code1 = GET_CODE (op1);
5122 if (code0 == REG || code0 == SUBREG)
5124 if (code1 == REG || code1 == SUBREG)
5125 index = op0, base = op1; /* index + base */
5127 base = op0, disp = op1; /* base + displacement */
5129 else if (code0 == MULT)
5131 index = XEXP (op0, 0);
5132 scale_rtx = XEXP (op0, 1);
5133 if (code1 == REG || code1 == SUBREG)
5134 base = op1; /* index*scale + base */
5136 disp = op1; /* index*scale + disp */
5138 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5140 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5141 scale_rtx = XEXP (XEXP (op0, 0), 1);
5142 base = XEXP (op0, 1);
5145 else if (code0 == PLUS)
5147 index = XEXP (op0, 0); /* index + base + disp */
5148 base = XEXP (op0, 1);
5154 else if (GET_CODE (addr) == MULT)
5156 index = XEXP (addr, 0); /* index*scale */
5157 scale_rtx = XEXP (addr, 1);
5159 else if (GET_CODE (addr) == ASHIFT)
5163 /* We're called for lea too, which implements ashift on occasion. */
5164 index = XEXP (addr, 0);
5165 tmp = XEXP (addr, 1);
5166 if (GET_CODE (tmp) != CONST_INT)
5168 scale = INTVAL (tmp);
5169 if ((unsigned HOST_WIDE_INT) scale > 3)
5175 disp = addr; /* displacement */
5177 /* Extract the integral value of scale. */
5180 if (GET_CODE (scale_rtx) != CONST_INT)
5182 scale = INTVAL (scale_rtx);
5185 /* Allow arg pointer and stack pointer as index if there is not scaling */
5186 if (base && index && scale == 1
5187 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5188 || index == stack_pointer_rtx))
5195 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5196 if ((base == hard_frame_pointer_rtx
5197 || base == frame_pointer_rtx
5198 || base == arg_pointer_rtx) && !disp)
5201 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5202 Avoid this by transforming to [%esi+0]. */
5203 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5204 && base && !index && !disp
5206 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5209 /* Special case: encode reg+reg instead of reg*2. */
5210 if (!base && index && scale && scale == 2)
5211 base = index, scale = 1;
5213 /* Special case: scaling cannot be encoded without base or displacement. */
5214 if (!base && !disp && index && scale != 1)
5225 /* Return cost of the memory address x.
5226 For i386, it is better to use a complex address than let gcc copy
5227 the address into a reg and make a new pseudo. But not if the address
5228 requires to two regs - that would mean more pseudos with longer
5231 ix86_address_cost (x)
5234 struct ix86_address parts;
5237 if (!ix86_decompose_address (x, &parts))
5240 if (parts.base && GET_CODE (parts.base) == SUBREG)
5241 parts.base = SUBREG_REG (parts.base);
5242 if (parts.index && GET_CODE (parts.index) == SUBREG)
5243 parts.index = SUBREG_REG (parts.index);
5245 /* More complex memory references are better. */
5246 if (parts.disp && parts.disp != const0_rtx)
5249 /* Attempt to minimize number of registers in the address. */
5251 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5253 && (!REG_P (parts.index)
5254 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5258 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5260 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5261 && parts.base != parts.index)
5264 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5265 since it's predecode logic can't detect the length of instructions
5266 and it degenerates to vector decoded. Increase cost of such
5267 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5268 to split such addresses or even refuse such addresses at all.
5270 Following addressing modes are affected:
5275 The first and last case may be avoidable by explicitly coding the zero in
5276 memory address, but I don't have AMD-K6 machine handy to check this
5280 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5281 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5282 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5288 /* If X is a machine specific address (i.e. a symbol or label being
5289 referenced as a displacement from the GOT implemented using an
5290 UNSPEC), then return the base term. Otherwise return X. */
5293 ix86_find_base_term (x)
5300 if (GET_CODE (x) != CONST)
5303 if (GET_CODE (term) == PLUS
5304 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5305 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5306 term = XEXP (term, 0);
5307 if (GET_CODE (term) != UNSPEC
5308 || XINT (term, 1) != UNSPEC_GOTPCREL)
5311 term = XVECEXP (term, 0, 0);
5313 if (GET_CODE (term) != SYMBOL_REF
5314 && GET_CODE (term) != LABEL_REF)
5320 if (GET_CODE (x) != PLUS
5321 || XEXP (x, 0) != pic_offset_table_rtx
5322 || GET_CODE (XEXP (x, 1)) != CONST)
5325 term = XEXP (XEXP (x, 1), 0);
5327 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5328 term = XEXP (term, 0);
5330 if (GET_CODE (term) != UNSPEC
5331 || XINT (term, 1) != UNSPEC_GOTOFF)
5334 term = XVECEXP (term, 0, 0);
5336 if (GET_CODE (term) != SYMBOL_REF
5337 && GET_CODE (term) != LABEL_REF)
5343 /* Determine if a given RTX is a valid constant. We already know this
5344 satisfies CONSTANT_P. */
5347 legitimate_constant_p (x)
5352 switch (GET_CODE (x))
5355 /* TLS symbols are not constant. */
5356 if (tls_symbolic_operand (x, Pmode))
5361 inner = XEXP (x, 0);
5363 /* Offsets of TLS symbols are never valid.
5364 Discourage CSE from creating them. */
5365 if (GET_CODE (inner) == PLUS
5366 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5369 /* Only some unspecs are valid as "constants". */
5370 if (GET_CODE (inner) == UNSPEC)
5371 switch (XINT (inner, 1))
5374 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5384 /* Otherwise we handle everything else in the move patterns. */
5388 /* Determine if it's legal to put X into the constant pool. This
5389 is not possible for the address of thread-local symbols, which
5390 is checked above. */
5393 ix86_cannot_force_const_mem (x)
5396 return !legitimate_constant_p (x);
5399 /* Determine if a given RTX is a valid constant address. */
5402 constant_address_p (x)
5405 switch (GET_CODE (x))
5412 return TARGET_64BIT;
5415 /* For Mach-O, really believe the CONST. */
5418 /* Otherwise fall through. */
5420 return !flag_pic && legitimate_constant_p (x);
5427 /* Nonzero if the constant value X is a legitimate general operand
5428 when generating PIC code. It is given that flag_pic is on and
5429 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5432 legitimate_pic_operand_p (x)
5437 switch (GET_CODE (x))
5440 inner = XEXP (x, 0);
5442 /* Only some unspecs are valid as "constants". */
5443 if (GET_CODE (inner) == UNSPEC)
5444 switch (XINT (inner, 1))
5447 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5455 return legitimate_pic_address_disp_p (x);
5462 /* Determine if a given CONST RTX is a valid memory displacement
5466 legitimate_pic_address_disp_p (disp)
5471 /* In 64bit mode we can allow direct addresses of symbols and labels
5472 when they are not dynamic symbols. */
5475 /* TLS references should always be enclosed in UNSPEC. */
5476 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5478 if (GET_CODE (disp) == SYMBOL_REF
5479 && ix86_cmodel == CM_SMALL_PIC
5480 && (CONSTANT_POOL_ADDRESS_P (disp)
5481 || SYMBOL_REF_FLAG (disp)))
5483 if (GET_CODE (disp) == LABEL_REF)
5485 if (GET_CODE (disp) == CONST
5486 && GET_CODE (XEXP (disp, 0)) == PLUS
5487 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5488 && ix86_cmodel == CM_SMALL_PIC
5489 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5490 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5491 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5492 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5493 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5494 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5497 if (GET_CODE (disp) != CONST)
5499 disp = XEXP (disp, 0);
5503 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5504 of GOT tables. We should not need these anyway. */
5505 if (GET_CODE (disp) != UNSPEC
5506 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5509 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5510 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5516 if (GET_CODE (disp) == PLUS)
5518 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5520 disp = XEXP (disp, 0);
5524 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5525 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5527 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5528 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5529 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5531 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5532 if (strstr (sym_name, "$pb") != 0)
5537 if (GET_CODE (disp) != UNSPEC)
5540 switch (XINT (disp, 1))
5545 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5547 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5548 case UNSPEC_GOTTPOFF:
5549 case UNSPEC_GOTNTPOFF:
5550 case UNSPEC_INDNTPOFF:
5553 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5555 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5557 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5563 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5564 memory address for an instruction. The MODE argument is the machine mode
5565 for the MEM expression that wants to use this address.
5567 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5568 convert common non-canonical forms to canonical form so that they will
5572 legitimate_address_p (mode, addr, strict)
5573 enum machine_mode mode;
5577 struct ix86_address parts;
5578 rtx base, index, disp;
5579 HOST_WIDE_INT scale;
5580 const char *reason = NULL;
5581 rtx reason_rtx = NULL_RTX;
5583 if (TARGET_DEBUG_ADDR)
5586 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5587 GET_MODE_NAME (mode), strict);
5591 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5593 if (TARGET_DEBUG_ADDR)
5594 fprintf (stderr, "Success.\n");
5598 if (ix86_decompose_address (addr, &parts) <= 0)
5600 reason = "decomposition failed";
5605 index = parts.index;
5607 scale = parts.scale;
5609 /* Validate base register.
5611 Don't allow SUBREG's here, it can lead to spill failures when the base
5612 is one word out of a two word structure, which is represented internally
5620 if (GET_CODE (base) == SUBREG)
5621 reg = SUBREG_REG (base);
5625 if (GET_CODE (reg) != REG)
5627 reason = "base is not a register";
5631 if (GET_MODE (base) != Pmode)
5633 reason = "base is not in Pmode";
5637 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5638 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5640 reason = "base is not valid";
5645 /* Validate index register.
5647 Don't allow SUBREG's here, it can lead to spill failures when the index
5648 is one word out of a two word structure, which is represented internally
5656 if (GET_CODE (index) == SUBREG)
5657 reg = SUBREG_REG (index);
5661 if (GET_CODE (reg) != REG)
5663 reason = "index is not a register";
5667 if (GET_MODE (index) != Pmode)
5669 reason = "index is not in Pmode";
5673 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5674 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5676 reason = "index is not valid";
5681 /* Validate scale factor. */
5684 reason_rtx = GEN_INT (scale);
5687 reason = "scale without index";
5691 if (scale != 2 && scale != 4 && scale != 8)
5693 reason = "scale is not a valid multiplier";
5698 /* Validate displacement. */
5703 if (GET_CODE (disp) == CONST
5704 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5705 switch (XINT (XEXP (disp, 0), 1))
5709 case UNSPEC_GOTPCREL:
5712 goto is_legitimate_pic;
5714 case UNSPEC_GOTTPOFF:
5715 case UNSPEC_GOTNTPOFF:
5716 case UNSPEC_INDNTPOFF:
5722 reason = "invalid address unspec";
5726 else if (flag_pic && (SYMBOLIC_CONST (disp)
5728 && !machopic_operand_p (disp)
5733 if (TARGET_64BIT && (index || base))
5735 /* foo@dtpoff(%rX) is ok. */
5736 if (GET_CODE (disp) != CONST
5737 || GET_CODE (XEXP (disp, 0)) != PLUS
5738 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5739 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5740 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5741 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5743 reason = "non-constant pic memory reference";
5747 else if (! legitimate_pic_address_disp_p (disp))
5749 reason = "displacement is an invalid pic construct";
5753 /* This code used to verify that a symbolic pic displacement
5754 includes the pic_offset_table_rtx register.
5756 While this is good idea, unfortunately these constructs may
5757 be created by "adds using lea" optimization for incorrect
5766 This code is nonsensical, but results in addressing
5767 GOT table with pic_offset_table_rtx base. We can't
5768 just refuse it easily, since it gets matched by
5769 "addsi3" pattern, that later gets split to lea in the
5770 case output register differs from input. While this
5771 can be handled by separate addsi pattern for this case
5772 that never results in lea, this seems to be easier and
5773 correct fix for crash to disable this test. */
5775 else if (!CONSTANT_ADDRESS_P (disp))
5777 reason = "displacement is not constant";
5780 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5782 reason = "displacement is out of range";
5785 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5787 reason = "displacement is a const_double";
5792 /* Everything looks valid. */
5793 if (TARGET_DEBUG_ADDR)
5794 fprintf (stderr, "Success.\n");
5798 if (TARGET_DEBUG_ADDR)
5800 fprintf (stderr, "Error: %s\n", reason);
5801 debug_rtx (reason_rtx);
5806 /* Return an unique alias set for the GOT. */
5808 static HOST_WIDE_INT
5809 ix86_GOT_alias_set ()
5811 static HOST_WIDE_INT set = -1;
5813 set = new_alias_set ();
5817 /* Return a legitimate reference for ORIG (an address) using the
5818 register REG. If REG is 0, a new pseudo is generated.
5820 There are two types of references that must be handled:
5822 1. Global data references must load the address from the GOT, via
5823 the PIC reg. An insn is emitted to do this load, and the reg is
5826 2. Static data references, constant pool addresses, and code labels
5827 compute the address as an offset from the GOT, whose base is in
5828 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5829 differentiate them from global data objects. The returned
5830 address is the PIC reg + an unspec constant.
5832 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5833 reg also appears in the address. */
5836 legitimize_pic_address (orig, reg)
5846 reg = gen_reg_rtx (Pmode);
5847 /* Use the generic Mach-O PIC machinery. */
5848 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5851 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5853 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5855 /* This symbol may be referenced via a displacement from the PIC
5856 base address (@GOTOFF). */
5858 if (reload_in_progress)
5859 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5860 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5861 new = gen_rtx_CONST (Pmode, new);
5862 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5866 emit_move_insn (reg, new);
5870 else if (GET_CODE (addr) == SYMBOL_REF)
5874 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5875 new = gen_rtx_CONST (Pmode, new);
5876 new = gen_rtx_MEM (Pmode, new);
5877 RTX_UNCHANGING_P (new) = 1;
5878 set_mem_alias_set (new, ix86_GOT_alias_set ());
5881 reg = gen_reg_rtx (Pmode);
5882 /* Use directly gen_movsi, otherwise the address is loaded
5883 into register for CSE. We don't want to CSE this addresses,
5884 instead we CSE addresses from the GOT table, so skip this. */
5885 emit_insn (gen_movsi (reg, new));
5890 /* This symbol must be referenced via a load from the
5891 Global Offset Table (@GOT). */
5893 if (reload_in_progress)
5894 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5895 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5896 new = gen_rtx_CONST (Pmode, new);
5897 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5898 new = gen_rtx_MEM (Pmode, new);
5899 RTX_UNCHANGING_P (new) = 1;
5900 set_mem_alias_set (new, ix86_GOT_alias_set ());
5903 reg = gen_reg_rtx (Pmode);
5904 emit_move_insn (reg, new);
5910 if (GET_CODE (addr) == CONST)
5912 addr = XEXP (addr, 0);
5914 /* We must match stuff we generate before. Assume the only
5915 unspecs that can get here are ours. Not that we could do
5916 anything with them anyway... */
5917 if (GET_CODE (addr) == UNSPEC
5918 || (GET_CODE (addr) == PLUS
5919 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5921 if (GET_CODE (addr) != PLUS)
5924 if (GET_CODE (addr) == PLUS)
5926 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5928 /* Check first to see if this is a constant offset from a @GOTOFF
5929 symbol reference. */
5930 if (local_symbolic_operand (op0, Pmode)
5931 && GET_CODE (op1) == CONST_INT)
5935 if (reload_in_progress)
5936 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5937 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5939 new = gen_rtx_PLUS (Pmode, new, op1);
5940 new = gen_rtx_CONST (Pmode, new);
5941 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5945 emit_move_insn (reg, new);
5951 if (INTVAL (op1) < -16*1024*1024
5952 || INTVAL (op1) >= 16*1024*1024)
5953 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5958 base = legitimize_pic_address (XEXP (addr, 0), reg);
5959 new = legitimize_pic_address (XEXP (addr, 1),
5960 base == reg ? NULL_RTX : reg);
5962 if (GET_CODE (new) == CONST_INT)
5963 new = plus_constant (base, INTVAL (new));
5966 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5968 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5969 new = XEXP (new, 1);
5971 new = gen_rtx_PLUS (Pmode, base, new);
5980 ix86_encode_section_info (decl, first)
5982 int first ATTRIBUTE_UNUSED;
5984 bool local_p = (*targetm.binds_local_p) (decl);
5987 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5988 if (GET_CODE (rtl) != MEM)
5990 symbol = XEXP (rtl, 0);
5991 if (GET_CODE (symbol) != SYMBOL_REF)
5994 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5995 symbol so that we may access it directly in the GOT. */
5998 SYMBOL_REF_FLAG (symbol) = local_p;
6000 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
6001 "local dynamic", "initial exec" or "local exec" TLS models
6004 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
6006 const char *symbol_str;
6009 enum tls_model kind = decl_tls_model (decl);
6011 if (TARGET_64BIT && ! flag_pic)
6013 /* x86-64 doesn't allow non-pic code for shared libraries,
6014 so don't generate GD/LD TLS models for non-pic code. */
6017 case TLS_MODEL_GLOBAL_DYNAMIC:
6018 kind = TLS_MODEL_INITIAL_EXEC; break;
6019 case TLS_MODEL_LOCAL_DYNAMIC:
6020 kind = TLS_MODEL_LOCAL_EXEC; break;
6026 symbol_str = XSTR (symbol, 0);
6028 if (symbol_str[0] == '%')
6030 if (symbol_str[1] == tls_model_chars[kind])
6034 len = strlen (symbol_str) + 1;
6035 newstr = alloca (len + 2);
6038 newstr[1] = tls_model_chars[kind];
6039 memcpy (newstr + 2, symbol_str, len);
6041 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
6045 /* Undo the above when printing symbol names. */
6048 ix86_strip_name_encoding (str)
6058 /* Load the thread pointer into a register. */
6061 get_thread_pointer ()
6065 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6066 tp = gen_rtx_MEM (Pmode, tp);
6067 RTX_UNCHANGING_P (tp) = 1;
6068 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6069 tp = force_reg (Pmode, tp);
6074 /* Try machine-dependent ways of modifying an illegitimate address
6075 to be legitimate. If we find one, return the new, valid address.
6076 This macro is used in only one place: `memory_address' in explow.c.
6078 OLDX is the address as it was before break_out_memory_refs was called.
6079 In some cases it is useful to look at this to decide what needs to be done.
6081 MODE and WIN are passed so that this macro can use
6082 GO_IF_LEGITIMATE_ADDRESS.
6084 It is always safe for this macro to do nothing. It exists to recognize
6085 opportunities to optimize the output.
6087 For the 80386, we handle X+REG by loading X into a register R and
6088 using R+REG. R will go in a general reg and indexing will be used.
6089 However, if REG is a broken-out memory address or multiplication,
6090 nothing needs to be done because REG can certainly go in a general reg.
6092 When -fpic is used, special handling is needed for symbolic references.
6093 See comments by legitimize_pic_address in i386.c for details. */
6096 legitimize_address (x, oldx, mode)
6098 register rtx oldx ATTRIBUTE_UNUSED;
6099 enum machine_mode mode;
6104 if (TARGET_DEBUG_ADDR)
6106 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6107 GET_MODE_NAME (mode));
6111 log = tls_symbolic_operand (x, mode);
6114 rtx dest, base, off, pic;
6119 case TLS_MODEL_GLOBAL_DYNAMIC:
6120 dest = gen_reg_rtx (Pmode);
6123 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6126 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6127 insns = get_insns ();
6130 emit_libcall_block (insns, dest, rax, x);
6133 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6136 case TLS_MODEL_LOCAL_DYNAMIC:
6137 base = gen_reg_rtx (Pmode);
6140 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6143 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6144 insns = get_insns ();
6147 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6148 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6149 emit_libcall_block (insns, base, rax, note);
6152 emit_insn (gen_tls_local_dynamic_base_32 (base));
6154 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6155 off = gen_rtx_CONST (Pmode, off);
6157 return gen_rtx_PLUS (Pmode, base, off);
6159 case TLS_MODEL_INITIAL_EXEC:
6163 type = UNSPEC_GOTNTPOFF;
6167 if (reload_in_progress)
6168 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6169 pic = pic_offset_table_rtx;
6170 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6172 else if (!TARGET_GNU_TLS)
6174 pic = gen_reg_rtx (Pmode);
6175 emit_insn (gen_set_got (pic));
6176 type = UNSPEC_GOTTPOFF;
6181 type = UNSPEC_INDNTPOFF;
6184 base = get_thread_pointer ();
6186 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6187 off = gen_rtx_CONST (Pmode, off);
6189 off = gen_rtx_PLUS (Pmode, pic, off);
6190 off = gen_rtx_MEM (Pmode, off);
6191 RTX_UNCHANGING_P (off) = 1;
6192 set_mem_alias_set (off, ix86_GOT_alias_set ());
6193 dest = gen_reg_rtx (Pmode);
6195 if (TARGET_64BIT || TARGET_GNU_TLS)
6197 emit_move_insn (dest, off);
6198 return gen_rtx_PLUS (Pmode, base, dest);
6201 emit_insn (gen_subsi3 (dest, base, off));
6204 case TLS_MODEL_LOCAL_EXEC:
6205 base = get_thread_pointer ();
6207 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6208 (TARGET_64BIT || TARGET_GNU_TLS)
6209 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6210 off = gen_rtx_CONST (Pmode, off);
6212 if (TARGET_64BIT || TARGET_GNU_TLS)
6213 return gen_rtx_PLUS (Pmode, base, off);
6216 dest = gen_reg_rtx (Pmode);
6217 emit_insn (gen_subsi3 (dest, base, off));
6228 if (flag_pic && SYMBOLIC_CONST (x))
6229 return legitimize_pic_address (x, 0);
6231 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6232 if (GET_CODE (x) == ASHIFT
6233 && GET_CODE (XEXP (x, 1)) == CONST_INT
6234 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6237 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6238 GEN_INT (1 << log));
6241 if (GET_CODE (x) == PLUS)
6243 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6245 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6246 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6247 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6250 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6251 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6252 GEN_INT (1 << log));
6255 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6256 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6257 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6260 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6261 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6262 GEN_INT (1 << log));
6265 /* Put multiply first if it isn't already. */
6266 if (GET_CODE (XEXP (x, 1)) == MULT)
6268 rtx tmp = XEXP (x, 0);
6269 XEXP (x, 0) = XEXP (x, 1);
6274 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6275 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6276 created by virtual register instantiation, register elimination, and
6277 similar optimizations. */
6278 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6281 x = gen_rtx_PLUS (Pmode,
6282 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6283 XEXP (XEXP (x, 1), 0)),
6284 XEXP (XEXP (x, 1), 1));
6288 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6289 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6290 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6291 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6292 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6293 && CONSTANT_P (XEXP (x, 1)))
6296 rtx other = NULL_RTX;
6298 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6300 constant = XEXP (x, 1);
6301 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6303 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6305 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6306 other = XEXP (x, 1);
6314 x = gen_rtx_PLUS (Pmode,
6315 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6316 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6317 plus_constant (other, INTVAL (constant)));
6321 if (changed && legitimate_address_p (mode, x, FALSE))
6324 if (GET_CODE (XEXP (x, 0)) == MULT)
6327 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6330 if (GET_CODE (XEXP (x, 1)) == MULT)
6333 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6337 && GET_CODE (XEXP (x, 1)) == REG
6338 && GET_CODE (XEXP (x, 0)) == REG)
6341 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6344 x = legitimize_pic_address (x, 0);
6347 if (changed && legitimate_address_p (mode, x, FALSE))
6350 if (GET_CODE (XEXP (x, 0)) == REG)
6352 register rtx temp = gen_reg_rtx (Pmode);
6353 register rtx val = force_operand (XEXP (x, 1), temp);
6355 emit_move_insn (temp, val);
6361 else if (GET_CODE (XEXP (x, 1)) == REG)
6363 register rtx temp = gen_reg_rtx (Pmode);
6364 register rtx val = force_operand (XEXP (x, 0), temp);
6366 emit_move_insn (temp, val);
6376 /* Print an integer constant expression in assembler syntax. Addition
6377 and subtraction are the only arithmetic that may appear in these
6378 expressions. FILE is the stdio stream to write to, X is the rtx, and
6379 CODE is the operand print code from the output string. */
6382 output_pic_addr_const (file, x, code)
6389 switch (GET_CODE (x))
6399 assemble_name (file, XSTR (x, 0));
6400 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6401 fputs ("@PLT", file);
6408 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6409 assemble_name (asm_out_file, buf);
6413 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6417 /* This used to output parentheses around the expression,
6418 but that does not work on the 386 (either ATT or BSD assembler). */
6419 output_pic_addr_const (file, XEXP (x, 0), code);
6423 if (GET_MODE (x) == VOIDmode)
6425 /* We can use %d if the number is <32 bits and positive. */
6426 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6427 fprintf (file, "0x%lx%08lx",
6428 (unsigned long) CONST_DOUBLE_HIGH (x),
6429 (unsigned long) CONST_DOUBLE_LOW (x));
6431 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6434 /* We can't handle floating point constants;
6435 PRINT_OPERAND must handle them. */
6436 output_operand_lossage ("floating constant misused");
6440 /* Some assemblers need integer constants to appear first. */
6441 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6443 output_pic_addr_const (file, XEXP (x, 0), code);
6445 output_pic_addr_const (file, XEXP (x, 1), code);
6447 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6449 output_pic_addr_const (file, XEXP (x, 1), code);
6451 output_pic_addr_const (file, XEXP (x, 0), code);
6459 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6460 output_pic_addr_const (file, XEXP (x, 0), code);
6462 output_pic_addr_const (file, XEXP (x, 1), code);
6464 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6468 if (XVECLEN (x, 0) != 1)
6470 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6471 switch (XINT (x, 1))
6474 fputs ("@GOT", file);
6477 fputs ("@GOTOFF", file);
6479 case UNSPEC_GOTPCREL:
6480 fputs ("@GOTPCREL(%rip)", file);
6482 case UNSPEC_GOTTPOFF:
6483 /* FIXME: This might be @TPOFF in Sun ld too. */
6484 fputs ("@GOTTPOFF", file);
6487 fputs ("@TPOFF", file);
6491 fputs ("@TPOFF", file);
6493 fputs ("@NTPOFF", file);
6496 fputs ("@DTPOFF", file);
6498 case UNSPEC_GOTNTPOFF:
6500 fputs ("@GOTTPOFF(%rip)", file);
6502 fputs ("@GOTNTPOFF", file);
6504 case UNSPEC_INDNTPOFF:
6505 fputs ("@INDNTPOFF", file);
6508 output_operand_lossage ("invalid UNSPEC as operand");
6514 output_operand_lossage ("invalid expression as operand");
6518 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6519 We need to handle our special PIC relocations. */
6522 i386_dwarf_output_addr_const (file, x)
6527 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6531 fprintf (file, "%s", ASM_LONG);
6534 output_pic_addr_const (file, x, '\0');
6536 output_addr_const (file, x);
6540 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6541 We need to emit DTP-relative relocations. */
6544 i386_output_dwarf_dtprel (file, size, x)
6549 fputs (ASM_LONG, file);
6550 output_addr_const (file, x);
6551 fputs ("@DTPOFF", file);
6557 fputs (", 0", file);
6564 /* In the name of slightly smaller debug output, and to cater to
6565 general assembler losage, recognize PIC+GOTOFF and turn it back
6566 into a direct symbol reference. */
6569 i386_simplify_dwarf_addr (orig_x)
6574 if (GET_CODE (x) == MEM)
6579 if (GET_CODE (x) != CONST
6580 || GET_CODE (XEXP (x, 0)) != UNSPEC
6581 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6582 || GET_CODE (orig_x) != MEM)
6584 return XVECEXP (XEXP (x, 0), 0, 0);
6587 if (GET_CODE (x) != PLUS
6588 || GET_CODE (XEXP (x, 1)) != CONST)
6591 if (GET_CODE (XEXP (x, 0)) == REG
6592 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6593 /* %ebx + GOT/GOTOFF */
6595 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6597 /* %ebx + %reg * scale + GOT/GOTOFF */
6599 if (GET_CODE (XEXP (y, 0)) == REG
6600 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6602 else if (GET_CODE (XEXP (y, 1)) == REG
6603 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6607 if (GET_CODE (y) != REG
6608 && GET_CODE (y) != MULT
6609 && GET_CODE (y) != ASHIFT)
6615 x = XEXP (XEXP (x, 1), 0);
6616 if (GET_CODE (x) == UNSPEC
6617 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6618 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6621 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6622 return XVECEXP (x, 0, 0);
6625 if (GET_CODE (x) == PLUS
6626 && GET_CODE (XEXP (x, 0)) == UNSPEC
6627 && GET_CODE (XEXP (x, 1)) == CONST_INT
6628 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6629 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6630 && GET_CODE (orig_x) != MEM)))
6632 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6634 return gen_rtx_PLUS (Pmode, y, x);
6642 put_condition_code (code, mode, reverse, fp, file)
6644 enum machine_mode mode;
6650 if (mode == CCFPmode || mode == CCFPUmode)
6652 enum rtx_code second_code, bypass_code;
6653 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6654 if (bypass_code != NIL || second_code != NIL)
6656 code = ix86_fp_compare_code_to_integer (code);
6660 code = reverse_condition (code);
6671 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6676 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6677 Those same assemblers have the same but opposite losage on cmov. */
6680 suffix = fp ? "nbe" : "a";
6683 if (mode == CCNOmode || mode == CCGOCmode)
6685 else if (mode == CCmode || mode == CCGCmode)
6696 if (mode == CCNOmode || mode == CCGOCmode)
6698 else if (mode == CCmode || mode == CCGCmode)
6707 suffix = fp ? "nb" : "ae";
6710 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6720 suffix = fp ? "u" : "p";
6723 suffix = fp ? "nu" : "np";
6728 fputs (suffix, file);
6732 print_reg (x, code, file)
6737 if (REGNO (x) == ARG_POINTER_REGNUM
6738 || REGNO (x) == FRAME_POINTER_REGNUM
6739 || REGNO (x) == FLAGS_REG
6740 || REGNO (x) == FPSR_REG)
6743 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6746 if (code == 'w' || MMX_REG_P (x))
6748 else if (code == 'b')
6750 else if (code == 'k')
6752 else if (code == 'q')
6754 else if (code == 'y')
6756 else if (code == 'h')
6759 code = GET_MODE_SIZE (GET_MODE (x));
6761 /* Irritatingly, AMD extended registers use different naming convention
6762 from the normal registers. */
6763 if (REX_INT_REG_P (x))
6770 error ("extended registers have no high halves");
6773 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6776 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6779 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6782 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6785 error ("unsupported operand size for extended register");
6793 if (STACK_TOP_P (x))
6795 fputs ("st(0)", file);
6802 if (! ANY_FP_REG_P (x))
6803 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6807 fputs (hi_reg_name[REGNO (x)], file);
6810 fputs (qi_reg_name[REGNO (x)], file);
6813 fputs (qi_high_reg_name[REGNO (x)], file);
6820 /* Locate some local-dynamic symbol still in use by this function
6821 so that we can print its name in some tls_local_dynamic_base
6825 get_some_local_dynamic_name ()
6829 if (cfun->machine->some_ld_name)
6830 return cfun->machine->some_ld_name;
6832 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6834 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6835 return cfun->machine->some_ld_name;
6841 get_some_local_dynamic_name_1 (px, data)
6843 void *data ATTRIBUTE_UNUSED;
6847 if (GET_CODE (x) == SYMBOL_REF
6848 && local_dynamic_symbolic_operand (x, Pmode))
6850 cfun->machine->some_ld_name = XSTR (x, 0);
6858 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6859 C -- print opcode suffix for set/cmov insn.
6860 c -- like C, but print reversed condition
6861 F,f -- likewise, but for floating-point.
6862 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6864 R -- print the prefix for register names.
6865 z -- print the opcode suffix for the size of the current operand.
6866 * -- print a star (in certain assembler syntax)
6867 A -- print an absolute memory reference.
6868 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6869 s -- print a shift double count, followed by the assemblers argument
6871 b -- print the QImode name of the register for the indicated operand.
6872 %b0 would print %al if operands[0] is reg 0.
6873 w -- likewise, print the HImode name of the register.
6874 k -- likewise, print the SImode name of the register.
6875 q -- likewise, print the DImode name of the register.
6876 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6877 y -- print "st(0)" instead of "st" as a register.
6878 D -- print condition for SSE cmp instruction.
6879 P -- if PIC, print an @PLT suffix.
6880 X -- don't print any sort of PIC '@' suffix for a symbol.
6881 & -- print some in-use local-dynamic symbol name.
6885 print_operand (file, x, code)
6895 if (ASSEMBLER_DIALECT == ASM_ATT)
6900 assemble_name (file, get_some_local_dynamic_name ());
6904 if (ASSEMBLER_DIALECT == ASM_ATT)
6906 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6908 /* Intel syntax. For absolute addresses, registers should not
6909 be surrounded by braces. */
6910 if (GET_CODE (x) != REG)
6913 PRINT_OPERAND (file, x, 0);
6921 PRINT_OPERAND (file, x, 0);
6926 if (ASSEMBLER_DIALECT == ASM_ATT)
6931 if (ASSEMBLER_DIALECT == ASM_ATT)
6936 if (ASSEMBLER_DIALECT == ASM_ATT)
6941 if (ASSEMBLER_DIALECT == ASM_ATT)
6946 if (ASSEMBLER_DIALECT == ASM_ATT)
6951 if (ASSEMBLER_DIALECT == ASM_ATT)
6956 /* 387 opcodes don't get size suffixes if the operands are
6958 if (STACK_REG_P (x))
6961 /* Likewise if using Intel opcodes. */
6962 if (ASSEMBLER_DIALECT == ASM_INTEL)
6965 /* This is the size of op from size of operand. */
6966 switch (GET_MODE_SIZE (GET_MODE (x)))
6969 #ifdef HAVE_GAS_FILDS_FISTS
6975 if (GET_MODE (x) == SFmode)
6990 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6992 #ifdef GAS_MNEMONICS
7018 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7020 PRINT_OPERAND (file, x, 0);
7026 /* Little bit of braindamage here. The SSE compare instructions
7027 does use completely different names for the comparisons that the
7028 fp conditional moves. */
7029 switch (GET_CODE (x))
7044 fputs ("unord", file);
7048 fputs ("neq", file);
7052 fputs ("nlt", file);
7056 fputs ("nle", file);
7059 fputs ("ord", file);
7067 #ifdef CMOV_SUN_AS_SYNTAX
7068 if (ASSEMBLER_DIALECT == ASM_ATT)
7070 switch (GET_MODE (x))
7072 case HImode: putc ('w', file); break;
7074 case SFmode: putc ('l', file); break;
7076 case DFmode: putc ('q', file); break;
7084 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7087 #ifdef CMOV_SUN_AS_SYNTAX
7088 if (ASSEMBLER_DIALECT == ASM_ATT)
7091 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7094 /* Like above, but reverse condition */
7096 /* Check to see if argument to %c is really a constant
7097 and not a condition code which needs to be reversed. */
7098 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7100 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7103 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7106 #ifdef CMOV_SUN_AS_SYNTAX
7107 if (ASSEMBLER_DIALECT == ASM_ATT)
7110 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7116 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7119 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7122 int pred_val = INTVAL (XEXP (x, 0));
7124 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7125 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7127 int taken = pred_val > REG_BR_PROB_BASE / 2;
7128 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7130 /* Emit hints only in the case default branch prediction
7131 heuristics would fail. */
7132 if (taken != cputaken)
7134 /* We use 3e (DS) prefix for taken branches and
7135 2e (CS) prefix for not taken branches. */
7137 fputs ("ds ; ", file);
7139 fputs ("cs ; ", file);
7146 output_operand_lossage ("invalid operand code `%c'", code);
7150 if (GET_CODE (x) == REG)
7152 PRINT_REG (x, code, file);
7155 else if (GET_CODE (x) == MEM)
7157 /* No `byte ptr' prefix for call instructions. */
7158 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7161 switch (GET_MODE_SIZE (GET_MODE (x)))
7163 case 1: size = "BYTE"; break;
7164 case 2: size = "WORD"; break;
7165 case 4: size = "DWORD"; break;
7166 case 8: size = "QWORD"; break;
7167 case 12: size = "XWORD"; break;
7168 case 16: size = "XMMWORD"; break;
7173 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7176 else if (code == 'w')
7178 else if (code == 'k')
7182 fputs (" PTR ", file);
7186 if (flag_pic && CONSTANT_ADDRESS_P (x))
7187 output_pic_addr_const (file, x, code);
7188 /* Avoid (%rip) for call operands. */
7189 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7190 && GET_CODE (x) != CONST_INT)
7191 output_addr_const (file, x);
7192 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7193 output_operand_lossage ("invalid constraints for operand");
7198 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7203 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7204 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7206 if (ASSEMBLER_DIALECT == ASM_ATT)
7208 fprintf (file, "0x%lx", l);
7211 /* These float cases don't actually occur as immediate operands. */
7212 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7216 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7217 fprintf (file, "%s", dstr);
7220 else if (GET_CODE (x) == CONST_DOUBLE
7221 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7225 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7226 fprintf (file, "%s", dstr);
7233 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7235 if (ASSEMBLER_DIALECT == ASM_ATT)
7238 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7239 || GET_CODE (x) == LABEL_REF)
7241 if (ASSEMBLER_DIALECT == ASM_ATT)
7244 fputs ("OFFSET FLAT:", file);
7247 if (GET_CODE (x) == CONST_INT)
7248 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7250 output_pic_addr_const (file, x, code);
7252 output_addr_const (file, x);
7256 /* Print a memory operand whose address is ADDR. */
7259 print_operand_address (file, addr)
7263 struct ix86_address parts;
7264 rtx base, index, disp;
7267 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7269 if (ASSEMBLER_DIALECT == ASM_INTEL)
7270 fputs ("DWORD PTR ", file);
7271 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7274 fputs ("fs:0", file);
7276 fputs ("gs:0", file);
7280 if (! ix86_decompose_address (addr, &parts))
7284 index = parts.index;
7286 scale = parts.scale;
7288 if (!base && !index)
7290 /* Displacement only requires special attention. */
7292 if (GET_CODE (disp) == CONST_INT)
7294 if (ASSEMBLER_DIALECT == ASM_INTEL)
7296 if (USER_LABEL_PREFIX[0] == 0)
7298 fputs ("ds:", file);
7300 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7303 output_pic_addr_const (file, addr, 0);
7305 output_addr_const (file, addr);
7307 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7309 && ((GET_CODE (addr) == SYMBOL_REF
7310 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7311 || GET_CODE (addr) == LABEL_REF
7312 || (GET_CODE (addr) == CONST
7313 && GET_CODE (XEXP (addr, 0)) == PLUS
7314 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7315 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7316 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7317 fputs ("(%rip)", file);
7321 if (ASSEMBLER_DIALECT == ASM_ATT)
7326 output_pic_addr_const (file, disp, 0);
7327 else if (GET_CODE (disp) == LABEL_REF)
7328 output_asm_label (disp);
7330 output_addr_const (file, disp);
7335 PRINT_REG (base, 0, file);
7339 PRINT_REG (index, 0, file);
7341 fprintf (file, ",%d", scale);
7347 rtx offset = NULL_RTX;
7351 /* Pull out the offset of a symbol; print any symbol itself. */
7352 if (GET_CODE (disp) == CONST
7353 && GET_CODE (XEXP (disp, 0)) == PLUS
7354 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7356 offset = XEXP (XEXP (disp, 0), 1);
7357 disp = gen_rtx_CONST (VOIDmode,
7358 XEXP (XEXP (disp, 0), 0));
7362 output_pic_addr_const (file, disp, 0);
7363 else if (GET_CODE (disp) == LABEL_REF)
7364 output_asm_label (disp);
7365 else if (GET_CODE (disp) == CONST_INT)
7368 output_addr_const (file, disp);
7374 PRINT_REG (base, 0, file);
7377 if (INTVAL (offset) >= 0)
7379 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7383 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7390 PRINT_REG (index, 0, file);
7392 fprintf (file, "*%d", scale);
7400 output_addr_const_extra (file, x)
7406 if (GET_CODE (x) != UNSPEC)
7409 op = XVECEXP (x, 0, 0);
7410 switch (XINT (x, 1))
7412 case UNSPEC_GOTTPOFF:
7413 output_addr_const (file, op);
7414 /* FIXME: This might be @TPOFF in Sun ld. */
7415 fputs ("@GOTTPOFF", file);
7418 output_addr_const (file, op);
7419 fputs ("@TPOFF", file);
7422 output_addr_const (file, op);
7424 fputs ("@TPOFF", file);
7426 fputs ("@NTPOFF", file);
7429 output_addr_const (file, op);
7430 fputs ("@DTPOFF", file);
7432 case UNSPEC_GOTNTPOFF:
7433 output_addr_const (file, op);
7435 fputs ("@GOTTPOFF(%rip)", file);
7437 fputs ("@GOTNTPOFF", file);
7439 case UNSPEC_INDNTPOFF:
7440 output_addr_const (file, op);
7441 fputs ("@INDNTPOFF", file);
7451 /* Split one or more DImode RTL references into pairs of SImode
7452 references. The RTL can be REG, offsettable MEM, integer constant, or
7453 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7454 split and "num" is its length. lo_half and hi_half are output arrays
7455 that parallel "operands". */
7458 split_di (operands, num, lo_half, hi_half)
7461 rtx lo_half[], hi_half[];
7465 rtx op = operands[num];
7467 /* simplify_subreg refuse to split volatile memory addresses,
7468 but we still have to handle it. */
7469 if (GET_CODE (op) == MEM)
7471 lo_half[num] = adjust_address (op, SImode, 0);
7472 hi_half[num] = adjust_address (op, SImode, 4);
7476 lo_half[num] = simplify_gen_subreg (SImode, op,
7477 GET_MODE (op) == VOIDmode
7478 ? DImode : GET_MODE (op), 0);
7479 hi_half[num] = simplify_gen_subreg (SImode, op,
7480 GET_MODE (op) == VOIDmode
7481 ? DImode : GET_MODE (op), 4);
7485 /* Split one or more TImode RTL references into pairs of SImode
7486 references. The RTL can be REG, offsettable MEM, integer constant, or
7487 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7488 split and "num" is its length. lo_half and hi_half are output arrays
7489 that parallel "operands". */
7492 split_ti (operands, num, lo_half, hi_half)
7495 rtx lo_half[], hi_half[];
7499 rtx op = operands[num];
7501 /* simplify_subreg refuse to split volatile memory addresses, but we
7502 still have to handle it. */
7503 if (GET_CODE (op) == MEM)
7505 lo_half[num] = adjust_address (op, DImode, 0);
7506 hi_half[num] = adjust_address (op, DImode, 8);
7510 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7511 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7516 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7517 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7518 is the expression of the binary operation. The output may either be
7519 emitted here, or returned to the caller, like all output_* functions.
7521 There is no guarantee that the operands are the same mode, as they
7522 might be within FLOAT or FLOAT_EXTEND expressions. */
7524 #ifndef SYSV386_COMPAT
7525 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7526 wants to fix the assemblers because that causes incompatibility
7527 with gcc. No-one wants to fix gcc because that causes
7528 incompatibility with assemblers... You can use the option of
7529 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7530 #define SYSV386_COMPAT 1
7534 output_387_binary_op (insn, operands)
7538 static char buf[30];
7541 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7543 #ifdef ENABLE_CHECKING
7544 /* Even if we do not want to check the inputs, this documents input
7545 constraints. Which helps in understanding the following code. */
7546 if (STACK_REG_P (operands[0])
7547 && ((REG_P (operands[1])
7548 && REGNO (operands[0]) == REGNO (operands[1])
7549 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7550 || (REG_P (operands[2])
7551 && REGNO (operands[0]) == REGNO (operands[2])
7552 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7553 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7559 switch (GET_CODE (operands[3]))
7562 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7563 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7571 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7572 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7580 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7581 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7589 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7590 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7604 if (GET_MODE (operands[0]) == SFmode)
7605 strcat (buf, "ss\t{%2, %0|%0, %2}");
7607 strcat (buf, "sd\t{%2, %0|%0, %2}");
7612 switch (GET_CODE (operands[3]))
7616 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7618 rtx temp = operands[2];
7619 operands[2] = operands[1];
7623 /* know operands[0] == operands[1]. */
7625 if (GET_CODE (operands[2]) == MEM)
7631 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7633 if (STACK_TOP_P (operands[0]))
7634 /* How is it that we are storing to a dead operand[2]?
7635 Well, presumably operands[1] is dead too. We can't
7636 store the result to st(0) as st(0) gets popped on this
7637 instruction. Instead store to operands[2] (which I
7638 think has to be st(1)). st(1) will be popped later.
7639 gcc <= 2.8.1 didn't have this check and generated
7640 assembly code that the Unixware assembler rejected. */
7641 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7643 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7647 if (STACK_TOP_P (operands[0]))
7648 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7650 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7655 if (GET_CODE (operands[1]) == MEM)
7661 if (GET_CODE (operands[2]) == MEM)
7667 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7670 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7671 derived assemblers, confusingly reverse the direction of
7672 the operation for fsub{r} and fdiv{r} when the
7673 destination register is not st(0). The Intel assembler
7674 doesn't have this brain damage. Read !SYSV386_COMPAT to
7675 figure out what the hardware really does. */
7676 if (STACK_TOP_P (operands[0]))
7677 p = "{p\t%0, %2|rp\t%2, %0}";
7679 p = "{rp\t%2, %0|p\t%0, %2}";
7681 if (STACK_TOP_P (operands[0]))
7682 /* As above for fmul/fadd, we can't store to st(0). */
7683 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7685 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7690 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7693 if (STACK_TOP_P (operands[0]))
7694 p = "{rp\t%0, %1|p\t%1, %0}";
7696 p = "{p\t%1, %0|rp\t%0, %1}";
7698 if (STACK_TOP_P (operands[0]))
7699 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7701 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7706 if (STACK_TOP_P (operands[0]))
7708 if (STACK_TOP_P (operands[1]))
7709 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7711 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7714 else if (STACK_TOP_P (operands[1]))
7717 p = "{\t%1, %0|r\t%0, %1}";
7719 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7725 p = "{r\t%2, %0|\t%0, %2}";
7727 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7740 /* Output code to initialize control word copies used by
7741 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7742 is set to control word rounding downwards. */
7744 emit_i387_cw_initialization (normal, round_down)
7745 rtx normal, round_down;
7747 rtx reg = gen_reg_rtx (HImode);
7749 emit_insn (gen_x86_fnstcw_1 (normal));
7750 emit_move_insn (reg, normal);
7751 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7753 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7755 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7756 emit_move_insn (round_down, reg);
7759 /* Output code for INSN to convert a float to a signed int. OPERANDS
7760 are the insn operands. The output may be [HSD]Imode and the input
7761 operand may be [SDX]Fmode. */
7764 output_fix_trunc (insn, operands)
7768 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7769 int dimode_p = GET_MODE (operands[0]) == DImode;
7771 /* Jump through a hoop or two for DImode, since the hardware has no
7772 non-popping instruction. We used to do this a different way, but
7773 that was somewhat fragile and broke with post-reload splitters. */
7774 if (dimode_p && !stack_top_dies)
7775 output_asm_insn ("fld\t%y1", operands);
7777 if (!STACK_TOP_P (operands[1]))
7780 if (GET_CODE (operands[0]) != MEM)
7783 output_asm_insn ("fldcw\t%3", operands);
7784 if (stack_top_dies || dimode_p)
7785 output_asm_insn ("fistp%z0\t%0", operands);
7787 output_asm_insn ("fist%z0\t%0", operands);
7788 output_asm_insn ("fldcw\t%2", operands);
7793 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7794 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7795 when fucom should be used. */
7798 output_fp_compare (insn, operands, eflags_p, unordered_p)
7801 int eflags_p, unordered_p;
7804 rtx cmp_op0 = operands[0];
7805 rtx cmp_op1 = operands[1];
7806 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7811 cmp_op1 = operands[2];
7815 if (GET_MODE (operands[0]) == SFmode)
7817 return "ucomiss\t{%1, %0|%0, %1}";
7819 return "comiss\t{%1, %0|%0, %1}";
7822 return "ucomisd\t{%1, %0|%0, %1}";
7824 return "comisd\t{%1, %0|%0, %1}";
7827 if (! STACK_TOP_P (cmp_op0))
7830 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7832 if (STACK_REG_P (cmp_op1)
7834 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7835 && REGNO (cmp_op1) != FIRST_STACK_REG)
7837 /* If both the top of the 387 stack dies, and the other operand
7838 is also a stack register that dies, then this must be a
7839 `fcompp' float compare */
7843 /* There is no double popping fcomi variant. Fortunately,
7844 eflags is immune from the fstp's cc clobbering. */
7846 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7848 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7856 return "fucompp\n\tfnstsw\t%0";
7858 return "fcompp\n\tfnstsw\t%0";
7871 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7873 static const char * const alt[24] =
7885 "fcomi\t{%y1, %0|%0, %y1}",
7886 "fcomip\t{%y1, %0|%0, %y1}",
7887 "fucomi\t{%y1, %0|%0, %y1}",
7888 "fucomip\t{%y1, %0|%0, %y1}",
7895 "fcom%z2\t%y2\n\tfnstsw\t%0",
7896 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7897 "fucom%z2\t%y2\n\tfnstsw\t%0",
7898 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7900 "ficom%z2\t%y2\n\tfnstsw\t%0",
7901 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7909 mask = eflags_p << 3;
7910 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7911 mask |= unordered_p << 1;
7912 mask |= stack_top_dies;
7925 ix86_output_addr_vec_elt (file, value)
7929 const char *directive = ASM_LONG;
7934 directive = ASM_QUAD;
7940 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7944 ix86_output_addr_diff_elt (file, value, rel)
7949 fprintf (file, "%s%s%d-%s%d\n",
7950 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7951 else if (HAVE_AS_GOTOFF_IN_DATA)
7952 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7954 else if (TARGET_MACHO)
7955 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7956 machopic_function_base_name () + 1);
7959 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7960 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7963 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7967 ix86_expand_clear (dest)
7972 /* We play register width games, which are only valid after reload. */
7973 if (!reload_completed)
7976 /* Avoid HImode and its attendant prefix byte. */
7977 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7978 dest = gen_rtx_REG (SImode, REGNO (dest));
7980 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7982 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7983 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7985 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7986 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7992 /* X is an unchanging MEM. If it is a constant pool reference, return
7993 the constant pool rtx, else NULL. */
7996 maybe_get_pool_constant (x)
8001 if (flag_pic && ! TARGET_64BIT)
8003 if (GET_CODE (x) != PLUS)
8005 if (XEXP (x, 0) != pic_offset_table_rtx)
8008 if (GET_CODE (x) != CONST)
8011 if (GET_CODE (x) != UNSPEC)
8013 if (XINT (x, 1) != UNSPEC_GOTOFF)
8015 x = XVECEXP (x, 0, 0);
8018 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8019 return get_pool_constant (x);
8025 ix86_expand_move (mode, operands)
8026 enum machine_mode mode;
8029 int strict = (reload_in_progress || reload_completed);
8030 rtx insn, op0, op1, tmp;
8035 if (tls_symbolic_operand (op1, Pmode))
8037 op1 = legitimize_address (op1, op1, VOIDmode);
8038 if (GET_CODE (op0) == MEM)
8040 tmp = gen_reg_rtx (mode);
8041 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
8045 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8050 rtx temp = ((reload_in_progress
8051 || ((op0 && GET_CODE (op0) == REG)
8053 ? op0 : gen_reg_rtx (Pmode));
8054 op1 = machopic_indirect_data_reference (op1, temp);
8055 op1 = machopic_legitimize_pic_address (op1, mode,
8056 temp == op1 ? 0 : temp);
8060 if (MACHOPIC_INDIRECT)
8061 op1 = machopic_indirect_data_reference (op1, 0);
8065 insn = gen_rtx_SET (VOIDmode, op0, op1);
8069 #endif /* TARGET_MACHO */
8070 if (GET_CODE (op0) == MEM)
8071 op1 = force_reg (Pmode, op1);
8075 if (GET_CODE (temp) != REG)
8076 temp = gen_reg_rtx (Pmode);
8077 temp = legitimize_pic_address (op1, temp);
8085 if (GET_CODE (op0) == MEM
8086 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8087 || !push_operand (op0, mode))
8088 && GET_CODE (op1) == MEM)
8089 op1 = force_reg (mode, op1);
8091 if (push_operand (op0, mode)
8092 && ! general_no_elim_operand (op1, mode))
8093 op1 = copy_to_mode_reg (mode, op1);
8095 /* Force large constants in 64bit compilation into register
8096 to get them CSEed. */
8097 if (TARGET_64BIT && mode == DImode
8098 && immediate_operand (op1, mode)
8099 && !x86_64_zero_extended_value (op1)
8100 && !register_operand (op0, mode)
8101 && optimize && !reload_completed && !reload_in_progress)
8102 op1 = copy_to_mode_reg (mode, op1);
8104 if (FLOAT_MODE_P (mode))
8106 /* If we are loading a floating point constant to a register,
8107 force the value to memory now, since we'll get better code
8108 out the back end. */
8112 else if (GET_CODE (op1) == CONST_DOUBLE
8113 && register_operand (op0, mode))
8114 op1 = validize_mem (force_const_mem (mode, op1));
8118 insn = gen_rtx_SET (VOIDmode, op0, op1);
8124 ix86_expand_vector_move (mode, operands)
8125 enum machine_mode mode;
8128 /* Force constants other than zero into memory. We do not know how
8129 the instructions used to build constants modify the upper 64 bits
8130 of the register, once we have that information we may be able
8131 to handle some of them more efficiently. */
8132 if ((reload_in_progress | reload_completed) == 0
8133 && register_operand (operands[0], mode)
8134 && CONSTANT_P (operands[1]))
8135 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8137 /* Make operand1 a register if it isn't already. */
8139 && !register_operand (operands[0], mode)
8140 && !register_operand (operands[1], mode))
8142 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8143 emit_move_insn (operands[0], temp);
8147 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8150 /* Attempt to expand a binary operator. Make the expansion closer to the
8151 actual machine, then just general_operand, which will allow 3 separate
8152 memory references (one output, two input) in a single insn. */
8155 ix86_expand_binary_operator (code, mode, operands)
8157 enum machine_mode mode;
8160 int matching_memory;
8161 rtx src1, src2, dst, op, clob;
8167 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8168 if (GET_RTX_CLASS (code) == 'c'
8169 && (rtx_equal_p (dst, src2)
8170 || immediate_operand (src1, mode)))
8177 /* If the destination is memory, and we do not have matching source
8178 operands, do things in registers. */
8179 matching_memory = 0;
8180 if (GET_CODE (dst) == MEM)
8182 if (rtx_equal_p (dst, src1))
8183 matching_memory = 1;
8184 else if (GET_RTX_CLASS (code) == 'c'
8185 && rtx_equal_p (dst, src2))
8186 matching_memory = 2;
8188 dst = gen_reg_rtx (mode);
8191 /* Both source operands cannot be in memory. */
8192 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8194 if (matching_memory != 2)
8195 src2 = force_reg (mode, src2);
8197 src1 = force_reg (mode, src1);
8200 /* If the operation is not commutable, source 1 cannot be a constant
8201 or non-matching memory. */
8202 if ((CONSTANT_P (src1)
8203 || (!matching_memory && GET_CODE (src1) == MEM))
8204 && GET_RTX_CLASS (code) != 'c')
8205 src1 = force_reg (mode, src1);
8207 /* If optimizing, copy to regs to improve CSE */
8208 if (optimize && ! no_new_pseudos)
8210 if (GET_CODE (dst) == MEM)
8211 dst = gen_reg_rtx (mode);
8212 if (GET_CODE (src1) == MEM)
8213 src1 = force_reg (mode, src1);
8214 if (GET_CODE (src2) == MEM)
8215 src2 = force_reg (mode, src2);
8218 /* Emit the instruction. */
8220 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8221 if (reload_in_progress)
8223 /* Reload doesn't know about the flags register, and doesn't know that
8224 it doesn't want to clobber it. We can only do this with PLUS. */
8231 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8232 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8235 /* Fix up the destination if needed. */
8236 if (dst != operands[0])
8237 emit_move_insn (operands[0], dst);
8240 /* Return TRUE or FALSE depending on whether the binary operator meets the
8241 appropriate constraints. */
8244 ix86_binary_operator_ok (code, mode, operands)
8246 enum machine_mode mode ATTRIBUTE_UNUSED;
8249 /* Both source operands cannot be in memory. */
8250 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8252 /* If the operation is not commutable, source 1 cannot be a constant. */
8253 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8255 /* If the destination is memory, we must have a matching source operand. */
8256 if (GET_CODE (operands[0]) == MEM
8257 && ! (rtx_equal_p (operands[0], operands[1])
8258 || (GET_RTX_CLASS (code) == 'c'
8259 && rtx_equal_p (operands[0], operands[2]))))
8261 /* If the operation is not commutable and the source 1 is memory, we must
8262 have a matching destination. */
8263 if (GET_CODE (operands[1]) == MEM
8264 && GET_RTX_CLASS (code) != 'c'
8265 && ! rtx_equal_p (operands[0], operands[1]))
8270 /* Attempt to expand a unary operator. Make the expansion closer to the
8271 actual machine, then just general_operand, which will allow 2 separate
8272 memory references (one output, one input) in a single insn. */
8275 ix86_expand_unary_operator (code, mode, operands)
8277 enum machine_mode mode;
8280 int matching_memory;
8281 rtx src, dst, op, clob;
8286 /* If the destination is memory, and we do not have matching source
8287 operands, do things in registers. */
8288 matching_memory = 0;
8289 if (GET_CODE (dst) == MEM)
8291 if (rtx_equal_p (dst, src))
8292 matching_memory = 1;
8294 dst = gen_reg_rtx (mode);
8297 /* When source operand is memory, destination must match. */
8298 if (!matching_memory && GET_CODE (src) == MEM)
8299 src = force_reg (mode, src);
8301 /* If optimizing, copy to regs to improve CSE */
8302 if (optimize && ! no_new_pseudos)
8304 if (GET_CODE (dst) == MEM)
8305 dst = gen_reg_rtx (mode);
8306 if (GET_CODE (src) == MEM)
8307 src = force_reg (mode, src);
8310 /* Emit the instruction. */
8312 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8313 if (reload_in_progress || code == NOT)
8315 /* Reload doesn't know about the flags register, and doesn't know that
8316 it doesn't want to clobber it. */
8323 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8324 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8327 /* Fix up the destination if needed. */
8328 if (dst != operands[0])
8329 emit_move_insn (operands[0], dst);
8332 /* Return TRUE or FALSE depending on whether the unary operator meets the
8333 appropriate constraints. */
8336 ix86_unary_operator_ok (code, mode, operands)
8337 enum rtx_code code ATTRIBUTE_UNUSED;
8338 enum machine_mode mode ATTRIBUTE_UNUSED;
8339 rtx operands[2] ATTRIBUTE_UNUSED;
8341 /* If one of operands is memory, source and destination must match. */
8342 if ((GET_CODE (operands[0]) == MEM
8343 || GET_CODE (operands[1]) == MEM)
8344 && ! rtx_equal_p (operands[0], operands[1]))
8349 /* Return TRUE or FALSE depending on whether the first SET in INSN
8350 has source and destination with matching CC modes, and that the
8351 CC mode is at least as constrained as REQ_MODE. */
8354 ix86_match_ccmode (insn, req_mode)
8356 enum machine_mode req_mode;
8359 enum machine_mode set_mode;
8361 set = PATTERN (insn);
8362 if (GET_CODE (set) == PARALLEL)
8363 set = XVECEXP (set, 0, 0);
8364 if (GET_CODE (set) != SET)
8366 if (GET_CODE (SET_SRC (set)) != COMPARE)
8369 set_mode = GET_MODE (SET_DEST (set));
8373 if (req_mode != CCNOmode
8374 && (req_mode != CCmode
8375 || XEXP (SET_SRC (set), 1) != const0_rtx))
8379 if (req_mode == CCGCmode)
8383 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8387 if (req_mode == CCZmode)
8397 return (GET_MODE (SET_SRC (set)) == set_mode);
8400 /* Generate insn patterns to do an integer compare of OPERANDS. */
8403 ix86_expand_int_compare (code, op0, op1)
8407 enum machine_mode cmpmode;
8410 cmpmode = SELECT_CC_MODE (code, op0, op1);
8411 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8413 /* This is very simple, but making the interface the same as in the
8414 FP case makes the rest of the code easier. */
8415 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8416 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8418 /* Return the test that should be put into the flags user, i.e.
8419 the bcc, scc, or cmov instruction. */
8420 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8423 /* Figure out whether to use ordered or unordered fp comparisons.
8424 Return the appropriate mode to use. */
8427 ix86_fp_compare_mode (code)
8428 enum rtx_code code ATTRIBUTE_UNUSED;
8430 /* ??? In order to make all comparisons reversible, we do all comparisons
8431 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8432 all forms trapping and nontrapping comparisons, we can make inequality
8433 comparisons trapping again, since it results in better code when using
8434 FCOM based compares. */
8435 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8439 ix86_cc_mode (code, op0, op1)
8443 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8444 return ix86_fp_compare_mode (code);
8447 /* Only zero flag is needed. */
8449 case NE: /* ZF!=0 */
8451 /* Codes needing carry flag. */
8452 case GEU: /* CF=0 */
8453 case GTU: /* CF=0 & ZF=0 */
8454 case LTU: /* CF=1 */
8455 case LEU: /* CF=1 | ZF=1 */
8457 /* Codes possibly doable only with sign flag when
8458 comparing against zero. */
8459 case GE: /* SF=OF or SF=0 */
8460 case LT: /* SF<>OF or SF=1 */
8461 if (op1 == const0_rtx)
8464 /* For other cases Carry flag is not required. */
8466 /* Codes doable only with sign flag when comparing
8467 against zero, but we miss jump instruction for it
8468 so we need to use relational tests agains overflow
8469 that thus needs to be zero. */
8470 case GT: /* ZF=0 & SF=OF */
8471 case LE: /* ZF=1 | SF<>OF */
8472 if (op1 == const0_rtx)
8476 /* strcmp pattern do (use flags) and combine may ask us for proper
8485 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8488 ix86_use_fcomi_compare (code)
8489 enum rtx_code code ATTRIBUTE_UNUSED;
8491 enum rtx_code swapped_code = swap_condition (code);
8492 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8493 || (ix86_fp_comparison_cost (swapped_code)
8494 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8497 /* Swap, force into registers, or otherwise massage the two operands
8498 to a fp comparison. The operands are updated in place; the new
8499 comparison code is returned. */
8501 static enum rtx_code
8502 ix86_prepare_fp_compare_args (code, pop0, pop1)
8506 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8507 rtx op0 = *pop0, op1 = *pop1;
8508 enum machine_mode op_mode = GET_MODE (op0);
8509 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8511 /* All of the unordered compare instructions only work on registers.
8512 The same is true of the XFmode compare instructions. The same is
8513 true of the fcomi compare instructions. */
8516 && (fpcmp_mode == CCFPUmode
8517 || op_mode == XFmode
8518 || op_mode == TFmode
8519 || ix86_use_fcomi_compare (code)))
8521 op0 = force_reg (op_mode, op0);
8522 op1 = force_reg (op_mode, op1);
8526 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8527 things around if they appear profitable, otherwise force op0
8530 if (standard_80387_constant_p (op0) == 0
8531 || (GET_CODE (op0) == MEM
8532 && ! (standard_80387_constant_p (op1) == 0
8533 || GET_CODE (op1) == MEM)))
8536 tmp = op0, op0 = op1, op1 = tmp;
8537 code = swap_condition (code);
8540 if (GET_CODE (op0) != REG)
8541 op0 = force_reg (op_mode, op0);
8543 if (CONSTANT_P (op1))
8545 if (standard_80387_constant_p (op1))
8546 op1 = force_reg (op_mode, op1);
8548 op1 = validize_mem (force_const_mem (op_mode, op1));
8552 /* Try to rearrange the comparison to make it cheaper. */
8553 if (ix86_fp_comparison_cost (code)
8554 > ix86_fp_comparison_cost (swap_condition (code))
8555 && (GET_CODE (op1) == REG || !no_new_pseudos))
8558 tmp = op0, op0 = op1, op1 = tmp;
8559 code = swap_condition (code);
8560 if (GET_CODE (op0) != REG)
8561 op0 = force_reg (op_mode, op0);
8569 /* Convert comparison codes we use to represent FP comparison to integer
8570 code that will result in proper branch. Return UNKNOWN if no such code
8572 static enum rtx_code
8573 ix86_fp_compare_code_to_integer (code)
8603 /* Split comparison code CODE into comparisons we can do using branch
8604 instructions. BYPASS_CODE is comparison code for branch that will
8605 branch around FIRST_CODE and SECOND_CODE. If some of branches
8606 is not required, set value to NIL.
8607 We never require more than two branches. */
8609 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8610 enum rtx_code code, *bypass_code, *first_code, *second_code;
8616 /* The fcomi comparison sets flags as follows:
8626 case GT: /* GTU - CF=0 & ZF=0 */
8627 case GE: /* GEU - CF=0 */
8628 case ORDERED: /* PF=0 */
8629 case UNORDERED: /* PF=1 */
8630 case UNEQ: /* EQ - ZF=1 */
8631 case UNLT: /* LTU - CF=1 */
8632 case UNLE: /* LEU - CF=1 | ZF=1 */
8633 case LTGT: /* EQ - ZF=0 */
8635 case LT: /* LTU - CF=1 - fails on unordered */
8637 *bypass_code = UNORDERED;
8639 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8641 *bypass_code = UNORDERED;
8643 case EQ: /* EQ - ZF=1 - fails on unordered */
8645 *bypass_code = UNORDERED;
8647 case NE: /* NE - ZF=0 - fails on unordered */
8649 *second_code = UNORDERED;
8651 case UNGE: /* GEU - CF=0 - fails on unordered */
8653 *second_code = UNORDERED;
8655 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8657 *second_code = UNORDERED;
8662 if (!TARGET_IEEE_FP)
8669 /* Return cost of comparison done fcom + arithmetics operations on AX.
8670 All following functions do use number of instructions as a cost metrics.
8671 In future this should be tweaked to compute bytes for optimize_size and
8672 take into account performance of various instructions on various CPUs. */
8674 ix86_fp_comparison_arithmetics_cost (code)
8677 if (!TARGET_IEEE_FP)
8679 /* The cost of code output by ix86_expand_fp_compare. */
8707 /* Return cost of comparison done using fcomi operation.
8708 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8710 ix86_fp_comparison_fcomi_cost (code)
8713 enum rtx_code bypass_code, first_code, second_code;
8714 /* Return arbitrarily high cost when instruction is not supported - this
8715 prevents gcc from using it. */
8718 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8719 return (bypass_code != NIL || second_code != NIL) + 2;
8722 /* Return cost of comparison done using sahf operation.
8723 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8725 ix86_fp_comparison_sahf_cost (code)
8728 enum rtx_code bypass_code, first_code, second_code;
8729 /* Return arbitrarily high cost when instruction is not preferred - this
8730 avoids gcc from using it. */
8731 if (!TARGET_USE_SAHF && !optimize_size)
8733 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8734 return (bypass_code != NIL || second_code != NIL) + 3;
8737 /* Compute cost of the comparison done using any method.
8738 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8740 ix86_fp_comparison_cost (code)
8743 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8746 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8747 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8749 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8750 if (min > sahf_cost)
8752 if (min > fcomi_cost)
8757 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8760 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8762 rtx op0, op1, scratch;
8766 enum machine_mode fpcmp_mode, intcmp_mode;
8768 int cost = ix86_fp_comparison_cost (code);
8769 enum rtx_code bypass_code, first_code, second_code;
8771 fpcmp_mode = ix86_fp_compare_mode (code);
8772 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8775 *second_test = NULL_RTX;
8777 *bypass_test = NULL_RTX;
8779 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8781 /* Do fcomi/sahf based test when profitable. */
8782 if ((bypass_code == NIL || bypass_test)
8783 && (second_code == NIL || second_test)
8784 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8788 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8789 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8795 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8796 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8798 scratch = gen_reg_rtx (HImode);
8799 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8800 emit_insn (gen_x86_sahf_1 (scratch));
8803 /* The FP codes work out to act like unsigned. */
8804 intcmp_mode = fpcmp_mode;
8806 if (bypass_code != NIL)
8807 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8808 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8810 if (second_code != NIL)
8811 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8812 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8817 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8818 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8819 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8821 scratch = gen_reg_rtx (HImode);
8822 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8824 /* In the unordered case, we have to check C2 for NaN's, which
8825 doesn't happen to work out to anything nice combination-wise.
8826 So do some bit twiddling on the value we've got in AH to come
8827 up with an appropriate set of condition codes. */
8829 intcmp_mode = CCNOmode;
8834 if (code == GT || !TARGET_IEEE_FP)
8836 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8841 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8842 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8843 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8844 intcmp_mode = CCmode;
8850 if (code == LT && TARGET_IEEE_FP)
8852 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8853 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8854 intcmp_mode = CCmode;
8859 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8865 if (code == GE || !TARGET_IEEE_FP)
8867 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8872 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8873 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8880 if (code == LE && TARGET_IEEE_FP)
8882 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8883 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8884 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8885 intcmp_mode = CCmode;
8890 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8896 if (code == EQ && TARGET_IEEE_FP)
8898 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8899 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8900 intcmp_mode = CCmode;
8905 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8912 if (code == NE && TARGET_IEEE_FP)
8914 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8915 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8921 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8927 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8931 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8940 /* Return the test that should be put into the flags user, i.e.
8941 the bcc, scc, or cmov instruction. */
8942 return gen_rtx_fmt_ee (code, VOIDmode,
8943 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8948 ix86_expand_compare (code, second_test, bypass_test)
8950 rtx *second_test, *bypass_test;
8953 op0 = ix86_compare_op0;
8954 op1 = ix86_compare_op1;
8957 *second_test = NULL_RTX;
8959 *bypass_test = NULL_RTX;
8961 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8962 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8963 second_test, bypass_test);
8965 ret = ix86_expand_int_compare (code, op0, op1);
8970 /* Return true if the CODE will result in nontrivial jump sequence. */
8972 ix86_fp_jump_nontrivial_p (code)
8975 enum rtx_code bypass_code, first_code, second_code;
8978 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8979 return bypass_code != NIL || second_code != NIL;
8983 ix86_expand_branch (code, label)
8989 switch (GET_MODE (ix86_compare_op0))
8995 tmp = ix86_expand_compare (code, NULL, NULL);
8996 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8997 gen_rtx_LABEL_REF (VOIDmode, label),
8999 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9009 enum rtx_code bypass_code, first_code, second_code;
9011 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9014 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9016 /* Check whether we will use the natural sequence with one jump. If
9017 so, we can expand jump early. Otherwise delay expansion by
9018 creating compound insn to not confuse optimizers. */
9019 if (bypass_code == NIL && second_code == NIL
9022 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9023 gen_rtx_LABEL_REF (VOIDmode, label),
9028 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9029 ix86_compare_op0, ix86_compare_op1);
9030 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9031 gen_rtx_LABEL_REF (VOIDmode, label),
9033 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9035 use_fcomi = ix86_use_fcomi_compare (code);
9036 vec = rtvec_alloc (3 + !use_fcomi);
9037 RTVEC_ELT (vec, 0) = tmp;
9039 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9041 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9044 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9046 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9054 /* Expand DImode branch into multiple compare+branch. */
9056 rtx lo[2], hi[2], label2;
9057 enum rtx_code code1, code2, code3;
9059 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9061 tmp = ix86_compare_op0;
9062 ix86_compare_op0 = ix86_compare_op1;
9063 ix86_compare_op1 = tmp;
9064 code = swap_condition (code);
9066 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9067 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9069 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9070 avoid two branches. This costs one extra insn, so disable when
9071 optimizing for size. */
9073 if ((code == EQ || code == NE)
9075 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9080 if (hi[1] != const0_rtx)
9081 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9082 NULL_RTX, 0, OPTAB_WIDEN);
9085 if (lo[1] != const0_rtx)
9086 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9087 NULL_RTX, 0, OPTAB_WIDEN);
9089 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9090 NULL_RTX, 0, OPTAB_WIDEN);
9092 ix86_compare_op0 = tmp;
9093 ix86_compare_op1 = const0_rtx;
9094 ix86_expand_branch (code, label);
9098 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9099 op1 is a constant and the low word is zero, then we can just
9100 examine the high word. */
9102 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9105 case LT: case LTU: case GE: case GEU:
9106 ix86_compare_op0 = hi[0];
9107 ix86_compare_op1 = hi[1];
9108 ix86_expand_branch (code, label);
9114 /* Otherwise, we need two or three jumps. */
9116 label2 = gen_label_rtx ();
9119 code2 = swap_condition (code);
9120 code3 = unsigned_condition (code);
9124 case LT: case GT: case LTU: case GTU:
9127 case LE: code1 = LT; code2 = GT; break;
9128 case GE: code1 = GT; code2 = LT; break;
9129 case LEU: code1 = LTU; code2 = GTU; break;
9130 case GEU: code1 = GTU; code2 = LTU; break;
9132 case EQ: code1 = NIL; code2 = NE; break;
9133 case NE: code2 = NIL; break;
9141 * if (hi(a) < hi(b)) goto true;
9142 * if (hi(a) > hi(b)) goto false;
9143 * if (lo(a) < lo(b)) goto true;
9147 ix86_compare_op0 = hi[0];
9148 ix86_compare_op1 = hi[1];
9151 ix86_expand_branch (code1, label);
9153 ix86_expand_branch (code2, label2);
9155 ix86_compare_op0 = lo[0];
9156 ix86_compare_op1 = lo[1];
9157 ix86_expand_branch (code3, label);
9160 emit_label (label2);
9169 /* Split branch based on floating point condition. */
9171 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9173 rtx op1, op2, target1, target2, tmp;
9176 rtx label = NULL_RTX;
9178 int bypass_probability = -1, second_probability = -1, probability = -1;
9181 if (target2 != pc_rtx)
9184 code = reverse_condition_maybe_unordered (code);
9189 condition = ix86_expand_fp_compare (code, op1, op2,
9190 tmp, &second, &bypass);
9192 if (split_branch_probability >= 0)
9194 /* Distribute the probabilities across the jumps.
9195 Assume the BYPASS and SECOND to be always test
9197 probability = split_branch_probability;
9199 /* Value of 1 is low enough to make no need for probability
9200 to be updated. Later we may run some experiments and see
9201 if unordered values are more frequent in practice. */
9203 bypass_probability = 1;
9205 second_probability = 1;
9207 if (bypass != NULL_RTX)
9209 label = gen_label_rtx ();
9210 i = emit_jump_insn (gen_rtx_SET
9212 gen_rtx_IF_THEN_ELSE (VOIDmode,
9214 gen_rtx_LABEL_REF (VOIDmode,
9217 if (bypass_probability >= 0)
9219 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9220 GEN_INT (bypass_probability),
9223 i = emit_jump_insn (gen_rtx_SET
9225 gen_rtx_IF_THEN_ELSE (VOIDmode,
9226 condition, target1, target2)));
9227 if (probability >= 0)
9229 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9230 GEN_INT (probability),
9232 if (second != NULL_RTX)
9234 i = emit_jump_insn (gen_rtx_SET
9236 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9238 if (second_probability >= 0)
9240 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9241 GEN_INT (second_probability),
9244 if (label != NULL_RTX)
9249 ix86_expand_setcc (code, dest)
9253 rtx ret, tmp, tmpreg;
9254 rtx second_test, bypass_test;
9256 if (GET_MODE (ix86_compare_op0) == DImode
9258 return 0; /* FAIL */
9260 if (GET_MODE (dest) != QImode)
9263 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9264 PUT_MODE (ret, QImode);
9269 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9270 if (bypass_test || second_test)
9272 rtx test = second_test;
9274 rtx tmp2 = gen_reg_rtx (QImode);
9281 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9283 PUT_MODE (test, QImode);
9284 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9287 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9289 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9292 return 1; /* DONE */
9295 /* Expand comparison setting or clearing carry flag. Return true when successful
9296 and set pop for the operation. */
9298 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9302 enum machine_mode mode =
9303 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9305 /* Do not handle DImode compares that go trought special path. Also we can't
9306 deal with FP compares yet. This is possible to add. */
9307 if ((mode == DImode && !TARGET_64BIT) || !INTEGRAL_MODE_P (mode))
9315 /* Convert a==0 into (unsigned)a<1. */
9318 if (op1 != const0_rtx)
9321 code = (code == EQ ? LTU : GEU);
9324 /* Convert a>b into b<a or a>=b-1. */
9327 if (GET_CODE (op1) == CONST_INT)
9329 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9330 /* Bail out on overflow. We still can swap operands but that
9331 would force loading of the constant into register. */
9332 if (op1 == const0_rtx
9333 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9335 code = (code == GTU ? GEU : LTU);
9342 code = (code == GTU ? LTU : GEU);
9346 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9349 if (mode == DImode || op1 != const0_rtx)
9351 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9352 code = (code == LT ? GEU : LTU);
9356 if (mode == DImode || op1 != constm1_rtx)
9358 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9359 code = (code == LE ? GEU : LTU);
9365 ix86_compare_op0 = op0;
9366 ix86_compare_op1 = op1;
9367 *pop = ix86_expand_compare (code, NULL, NULL);
9368 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9374 ix86_expand_int_movcc (operands)
9377 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9378 rtx compare_seq, compare_op;
9379 rtx second_test, bypass_test;
9380 enum machine_mode mode = GET_MODE (operands[0]);
9381 bool sign_bit_compare_p = false;;
9384 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9385 compare_seq = get_insns ();
9388 compare_code = GET_CODE (compare_op);
9390 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9391 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9392 sign_bit_compare_p = true;
9394 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9395 HImode insns, we'd be swallowed in word prefix ops. */
9397 if ((mode != HImode || TARGET_FAST_PREFIX)
9398 && (mode != DImode || TARGET_64BIT)
9399 && GET_CODE (operands[2]) == CONST_INT
9400 && GET_CODE (operands[3]) == CONST_INT)
9402 rtx out = operands[0];
9403 HOST_WIDE_INT ct = INTVAL (operands[2]);
9404 HOST_WIDE_INT cf = INTVAL (operands[3]);
9408 /* Sign bit compares are better done using shifts than we do by using
9410 if (sign_bit_compare_p
9411 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9412 ix86_compare_op1, &compare_op))
9414 /* Detect overlap between destination and compare sources. */
9417 if (!sign_bit_compare_p)
9419 compare_code = GET_CODE (compare_op);
9421 /* To simplify rest of code, restrict to the GEU case. */
9422 if (compare_code == LTU)
9424 HOST_WIDE_INT tmp = ct;
9427 compare_code = reverse_condition (compare_code);
9428 code = reverse_condition (code);
9432 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9433 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9434 tmp = gen_reg_rtx (mode);
9437 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9439 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp)));
9443 if (code == GT || code == GE)
9444 code = reverse_condition (code);
9447 HOST_WIDE_INT tmp = ct;
9451 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9452 ix86_compare_op1, VOIDmode, 0, -1);
9465 tmp = expand_simple_binop (mode, PLUS,
9467 copy_rtx (tmp), 1, OPTAB_DIRECT);
9478 tmp = expand_simple_binop (mode, IOR,
9480 copy_rtx (tmp), 1, OPTAB_DIRECT);
9482 else if (diff == -1 && ct)
9492 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9494 tmp = expand_simple_binop (mode, PLUS,
9495 copy_rtx (tmp), GEN_INT (cf),
9496 copy_rtx (tmp), 1, OPTAB_DIRECT);
9504 * andl cf - ct, dest
9514 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9517 tmp = expand_simple_binop (mode, AND,
9519 gen_int_mode (cf - ct, mode),
9520 copy_rtx (tmp), 1, OPTAB_DIRECT);
9522 tmp = expand_simple_binop (mode, PLUS,
9523 copy_rtx (tmp), GEN_INT (ct),
9524 copy_rtx (tmp), 1, OPTAB_DIRECT);
9527 if (!rtx_equal_p (tmp, out))
9528 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9530 return 1; /* DONE */
9536 tmp = ct, ct = cf, cf = tmp;
9538 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9540 /* We may be reversing unordered compare to normal compare, that
9541 is not valid in general (we may convert non-trapping condition
9542 to trapping one), however on i386 we currently emit all
9543 comparisons unordered. */
9544 compare_code = reverse_condition_maybe_unordered (compare_code);
9545 code = reverse_condition_maybe_unordered (code);
9549 compare_code = reverse_condition (compare_code);
9550 code = reverse_condition (code);
9555 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9556 && GET_CODE (ix86_compare_op1) == CONST_INT)
9558 if (ix86_compare_op1 == const0_rtx
9559 && (code == LT || code == GE))
9560 compare_code = code;
9561 else if (ix86_compare_op1 == constm1_rtx)
9565 else if (code == GT)
9570 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9571 if (compare_code != NIL
9572 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9573 && (cf == -1 || ct == -1))
9575 /* If lea code below could be used, only optimize
9576 if it results in a 2 insn sequence. */
9578 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9579 || diff == 3 || diff == 5 || diff == 9)
9580 || (compare_code == LT && ct == -1)
9581 || (compare_code == GE && cf == -1))
9584 * notl op1 (if necessary)
9592 code = reverse_condition (code);
9595 out = emit_store_flag (out, code, ix86_compare_op0,
9596 ix86_compare_op1, VOIDmode, 0, -1);
9598 out = expand_simple_binop (mode, IOR,
9600 out, 1, OPTAB_DIRECT);
9601 if (out != operands[0])
9602 emit_move_insn (operands[0], out);
9604 return 1; /* DONE */
9609 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9610 || diff == 3 || diff == 5 || diff == 9)
9611 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9612 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9618 * lea cf(dest*(ct-cf)),dest
9622 * This also catches the degenerate setcc-only case.
9628 out = emit_store_flag (out, code, ix86_compare_op0,
9629 ix86_compare_op1, VOIDmode, 0, 1);
9632 /* On x86_64 the lea instruction operates on Pmode, so we need
9633 to get arithmetics done in proper mode to match. */
9635 tmp = copy_rtx (out);
9639 out1 = copy_rtx (out);
9640 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9644 tmp = gen_rtx_PLUS (mode, tmp, out1);
9650 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9653 if (!rtx_equal_p (tmp, out))
9656 out = force_operand (tmp, copy_rtx (out));
9658 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9660 if (!rtx_equal_p (out, operands[0]))
9661 emit_move_insn (operands[0], copy_rtx (out));
9663 return 1; /* DONE */
9667 * General case: Jumpful:
9668 * xorl dest,dest cmpl op1, op2
9669 * cmpl op1, op2 movl ct, dest
9671 * decl dest movl cf, dest
9672 * andl (cf-ct),dest 1:
9677 * This is reasonably steep, but branch mispredict costs are
9678 * high on modern cpus, so consider failing only if optimizing
9682 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9683 && BRANCH_COST >= 2)
9689 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9690 /* We may be reversing unordered compare to normal compare,
9691 that is not valid in general (we may convert non-trapping
9692 condition to trapping one), however on i386 we currently
9693 emit all comparisons unordered. */
9694 code = reverse_condition_maybe_unordered (code);
9697 code = reverse_condition (code);
9698 if (compare_code != NIL)
9699 compare_code = reverse_condition (compare_code);
9703 if (compare_code != NIL)
9705 /* notl op1 (if needed)
9710 For x < 0 (resp. x <= -1) there will be no notl,
9711 so if possible swap the constants to get rid of the
9713 True/false will be -1/0 while code below (store flag
9714 followed by decrement) is 0/-1, so the constants need
9715 to be exchanged once more. */
9717 if (compare_code == GE || !cf)
9719 code = reverse_condition (code);
9724 HOST_WIDE_INT tmp = cf;
9729 out = emit_store_flag (out, code, ix86_compare_op0,
9730 ix86_compare_op1, VOIDmode, 0, -1);
9734 out = emit_store_flag (out, code, ix86_compare_op0,
9735 ix86_compare_op1, VOIDmode, 0, 1);
9737 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9738 copy_rtx (out), 1, OPTAB_DIRECT);
9741 out = expand_simple_binop (mode, AND, copy_rtx (out),
9742 gen_int_mode (cf - ct, mode),
9743 copy_rtx (out), 1, OPTAB_DIRECT);
9745 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9746 copy_rtx (out), 1, OPTAB_DIRECT);
9747 if (!rtx_equal_p (out, operands[0]))
9748 emit_move_insn (operands[0], copy_rtx (out));
9750 return 1; /* DONE */
9754 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9756 /* Try a few things more with specific constants and a variable. */
9759 rtx var, orig_out, out, tmp;
9761 if (BRANCH_COST <= 2)
9762 return 0; /* FAIL */
9764 /* If one of the two operands is an interesting constant, load a
9765 constant with the above and mask it in with a logical operation. */
9767 if (GET_CODE (operands[2]) == CONST_INT)
9770 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9771 operands[3] = constm1_rtx, op = and_optab;
9772 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9773 operands[3] = const0_rtx, op = ior_optab;
9775 return 0; /* FAIL */
9777 else if (GET_CODE (operands[3]) == CONST_INT)
9780 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9781 operands[2] = constm1_rtx, op = and_optab;
9782 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9783 operands[2] = const0_rtx, op = ior_optab;
9785 return 0; /* FAIL */
9788 return 0; /* FAIL */
9790 orig_out = operands[0];
9791 tmp = gen_reg_rtx (mode);
9794 /* Recurse to get the constant loaded. */
9795 if (ix86_expand_int_movcc (operands) == 0)
9796 return 0; /* FAIL */
9798 /* Mask in the interesting variable. */
9799 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9801 if (!rtx_equal_p (out, orig_out))
9802 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9804 return 1; /* DONE */
9808 * For comparison with above,
9818 if (! nonimmediate_operand (operands[2], mode))
9819 operands[2] = force_reg (mode, operands[2]);
9820 if (! nonimmediate_operand (operands[3], mode))
9821 operands[3] = force_reg (mode, operands[3]);
9823 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9825 rtx tmp = gen_reg_rtx (mode);
9826 emit_move_insn (tmp, operands[3]);
9829 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9831 rtx tmp = gen_reg_rtx (mode);
9832 emit_move_insn (tmp, operands[2]);
9836 if (! register_operand (operands[2], VOIDmode)
9838 || ! register_operand (operands[3], VOIDmode)))
9839 operands[2] = force_reg (mode, operands[2]);
9842 && ! register_operand (operands[3], VOIDmode))
9843 operands[3] = force_reg (mode, operands[3]);
9845 emit_insn (compare_seq);
9846 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9847 gen_rtx_IF_THEN_ELSE (mode,
9848 compare_op, operands[2],
9851 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9852 gen_rtx_IF_THEN_ELSE (mode,
9854 copy_rtx (operands[3]),
9855 copy_rtx (operands[0]))));
9857 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9858 gen_rtx_IF_THEN_ELSE (mode,
9860 copy_rtx (operands[2]),
9861 copy_rtx (operands[0]))));
9863 return 1; /* DONE */
9867 ix86_expand_fp_movcc (operands)
9872 rtx compare_op, second_test, bypass_test;
9874 /* For SF/DFmode conditional moves based on comparisons
9875 in same mode, we may want to use SSE min/max instructions. */
9876 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9877 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9878 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9879 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9881 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9882 /* We may be called from the post-reload splitter. */
9883 && (!REG_P (operands[0])
9884 || SSE_REG_P (operands[0])
9885 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9887 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9888 code = GET_CODE (operands[1]);
9890 /* See if we have (cross) match between comparison operands and
9891 conditional move operands. */
9892 if (rtx_equal_p (operands[2], op1))
9897 code = reverse_condition_maybe_unordered (code);
9899 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9901 /* Check for min operation. */
9902 if (code == LT || code == UNLE)
9910 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9911 if (memory_operand (op0, VOIDmode))
9912 op0 = force_reg (GET_MODE (operands[0]), op0);
9913 if (GET_MODE (operands[0]) == SFmode)
9914 emit_insn (gen_minsf3 (operands[0], op0, op1));
9916 emit_insn (gen_mindf3 (operands[0], op0, op1));
9919 /* Check for max operation. */
9920 if (code == GT || code == UNGE)
9928 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9929 if (memory_operand (op0, VOIDmode))
9930 op0 = force_reg (GET_MODE (operands[0]), op0);
9931 if (GET_MODE (operands[0]) == SFmode)
9932 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9934 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9938 /* Manage condition to be sse_comparison_operator. In case we are
9939 in non-ieee mode, try to canonicalize the destination operand
9940 to be first in the comparison - this helps reload to avoid extra
9942 if (!sse_comparison_operator (operands[1], VOIDmode)
9943 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9945 rtx tmp = ix86_compare_op0;
9946 ix86_compare_op0 = ix86_compare_op1;
9947 ix86_compare_op1 = tmp;
9948 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9949 VOIDmode, ix86_compare_op0,
9952 /* Similarly try to manage result to be first operand of conditional
9953 move. We also don't support the NE comparison on SSE, so try to
9955 if ((rtx_equal_p (operands[0], operands[3])
9956 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9957 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9959 rtx tmp = operands[2];
9960 operands[2] = operands[3];
9962 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9963 (GET_CODE (operands[1])),
9964 VOIDmode, ix86_compare_op0,
9967 if (GET_MODE (operands[0]) == SFmode)
9968 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9969 operands[2], operands[3],
9970 ix86_compare_op0, ix86_compare_op1));
9972 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9973 operands[2], operands[3],
9974 ix86_compare_op0, ix86_compare_op1));
9978 /* The floating point conditional move instructions don't directly
9979 support conditions resulting from a signed integer comparison. */
9981 code = GET_CODE (operands[1]);
9982 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9984 /* The floating point conditional move instructions don't directly
9985 support signed integer comparisons. */
9987 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9989 if (second_test != NULL || bypass_test != NULL)
9991 tmp = gen_reg_rtx (QImode);
9992 ix86_expand_setcc (code, tmp);
9994 ix86_compare_op0 = tmp;
9995 ix86_compare_op1 = const0_rtx;
9996 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9998 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10000 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10001 emit_move_insn (tmp, operands[3]);
10004 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10006 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10007 emit_move_insn (tmp, operands[2]);
10011 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10012 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10017 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10018 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10023 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10024 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10032 /* Expand conditional increment or decrement using adb/sbb instructions.
10033 The default case using setcc followed by the conditional move can be
10034 done by generic code. */
10036 ix86_expand_int_addcc (operands)
10039 enum rtx_code code = GET_CODE (operands[1]);
10041 rtx val = const0_rtx;
10043 if (operands[3] != const1_rtx
10044 && operands[3] != constm1_rtx)
10046 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10047 ix86_compare_op1, &compare_op))
10049 if (GET_CODE (compare_op) != LTU)
10051 if ((GET_CODE (compare_op) == LTU) == (operands[3] == constm1_rtx))
10053 switch (GET_MODE (operands[0]))
10056 emit_insn (gen_subqi3_carry (operands[0], operands[2], val));
10059 emit_insn (gen_subhi3_carry (operands[0], operands[2], val));
10062 emit_insn (gen_subsi3_carry (operands[0], operands[2], val));
10065 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val));
10073 switch (GET_MODE (operands[0]))
10076 emit_insn (gen_addqi3_carry (operands[0], operands[2], val));
10079 emit_insn (gen_addhi3_carry (operands[0], operands[2], val));
10082 emit_insn (gen_addsi3_carry (operands[0], operands[2], val));
10085 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val));
10091 return 1; /* DONE */
10095 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10096 works for floating pointer parameters and nonoffsetable memories.
10097 For pushes, it returns just stack offsets; the values will be saved
10098 in the right order. Maximally three parts are generated. */
10101 ix86_split_to_parts (operand, parts, mode)
10104 enum machine_mode mode;
10109 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10111 size = (GET_MODE_SIZE (mode) + 4) / 8;
10113 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10115 if (size < 2 || size > 3)
10118 /* Optimize constant pool reference to immediates. This is used by fp
10119 moves, that force all constants to memory to allow combining. */
10120 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10122 rtx tmp = maybe_get_pool_constant (operand);
10127 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10129 /* The only non-offsetable memories we handle are pushes. */
10130 if (! push_operand (operand, VOIDmode))
10133 operand = copy_rtx (operand);
10134 PUT_MODE (operand, Pmode);
10135 parts[0] = parts[1] = parts[2] = operand;
10137 else if (!TARGET_64BIT)
10139 if (mode == DImode)
10140 split_di (&operand, 1, &parts[0], &parts[1]);
10143 if (REG_P (operand))
10145 if (!reload_completed)
10147 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10148 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10150 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10152 else if (offsettable_memref_p (operand))
10154 operand = adjust_address (operand, SImode, 0);
10155 parts[0] = operand;
10156 parts[1] = adjust_address (operand, SImode, 4);
10158 parts[2] = adjust_address (operand, SImode, 8);
10160 else if (GET_CODE (operand) == CONST_DOUBLE)
10165 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10170 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10171 parts[2] = gen_int_mode (l[2], SImode);
10174 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10179 parts[1] = gen_int_mode (l[1], SImode);
10180 parts[0] = gen_int_mode (l[0], SImode);
10188 if (mode == TImode)
10189 split_ti (&operand, 1, &parts[0], &parts[1]);
10190 if (mode == XFmode || mode == TFmode)
10192 if (REG_P (operand))
10194 if (!reload_completed)
10196 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10197 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10199 else if (offsettable_memref_p (operand))
10201 operand = adjust_address (operand, DImode, 0);
10202 parts[0] = operand;
10203 parts[1] = adjust_address (operand, SImode, 8);
10205 else if (GET_CODE (operand) == CONST_DOUBLE)
10210 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10211 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10212 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10213 if (HOST_BITS_PER_WIDE_INT >= 64)
10216 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10217 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10220 parts[0] = immed_double_const (l[0], l[1], DImode);
10221 parts[1] = gen_int_mode (l[2], SImode);
10231 /* Emit insns to perform a move or push of DI, DF, and XF values.
10232 Return false when normal moves are needed; true when all required
10233 insns have been emitted. Operands 2-4 contain the input values
10234 int the correct order; operands 5-7 contain the output values. */
10237 ix86_split_long_move (operands)
10243 int collisions = 0;
10244 enum machine_mode mode = GET_MODE (operands[0]);
10246 /* The DFmode expanders may ask us to move double.
10247 For 64bit target this is single move. By hiding the fact
10248 here we simplify i386.md splitters. */
10249 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10251 /* Optimize constant pool reference to immediates. This is used by
10252 fp moves, that force all constants to memory to allow combining. */
10254 if (GET_CODE (operands[1]) == MEM
10255 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10256 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10257 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10258 if (push_operand (operands[0], VOIDmode))
10260 operands[0] = copy_rtx (operands[0]);
10261 PUT_MODE (operands[0], Pmode);
10264 operands[0] = gen_lowpart (DImode, operands[0]);
10265 operands[1] = gen_lowpart (DImode, operands[1]);
10266 emit_move_insn (operands[0], operands[1]);
10270 /* The only non-offsettable memory we handle is push. */
10271 if (push_operand (operands[0], VOIDmode))
10273 else if (GET_CODE (operands[0]) == MEM
10274 && ! offsettable_memref_p (operands[0]))
10277 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10278 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10280 /* When emitting push, take care for source operands on the stack. */
10281 if (push && GET_CODE (operands[1]) == MEM
10282 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10285 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10286 XEXP (part[1][2], 0));
10287 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10288 XEXP (part[1][1], 0));
10291 /* We need to do copy in the right order in case an address register
10292 of the source overlaps the destination. */
10293 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10295 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10297 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10300 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10303 /* Collision in the middle part can be handled by reordering. */
10304 if (collisions == 1 && nparts == 3
10305 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10308 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10309 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10312 /* If there are more collisions, we can't handle it by reordering.
10313 Do an lea to the last part and use only one colliding move. */
10314 else if (collisions > 1)
10317 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
10318 XEXP (part[1][0], 0)));
10319 part[1][0] = change_address (part[1][0],
10320 TARGET_64BIT ? DImode : SImode,
10321 part[0][nparts - 1]);
10322 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
10324 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
10334 /* We use only first 12 bytes of TFmode value, but for pushing we
10335 are required to adjust stack as if we were pushing real 16byte
10337 if (mode == TFmode && !TARGET_64BIT)
10338 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10340 emit_move_insn (part[0][2], part[1][2]);
10345 /* In 64bit mode we don't have 32bit push available. In case this is
10346 register, it is OK - we will just use larger counterpart. We also
10347 retype memory - these comes from attempt to avoid REX prefix on
10348 moving of second half of TFmode value. */
10349 if (GET_MODE (part[1][1]) == SImode)
10351 if (GET_CODE (part[1][1]) == MEM)
10352 part[1][1] = adjust_address (part[1][1], DImode, 0);
10353 else if (REG_P (part[1][1]))
10354 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10357 if (GET_MODE (part[1][0]) == SImode)
10358 part[1][0] = part[1][1];
10361 emit_move_insn (part[0][1], part[1][1]);
10362 emit_move_insn (part[0][0], part[1][0]);
10366 /* Choose correct order to not overwrite the source before it is copied. */
10367 if ((REG_P (part[0][0])
10368 && REG_P (part[1][1])
10369 && (REGNO (part[0][0]) == REGNO (part[1][1])
10371 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10373 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10377 operands[2] = part[0][2];
10378 operands[3] = part[0][1];
10379 operands[4] = part[0][0];
10380 operands[5] = part[1][2];
10381 operands[6] = part[1][1];
10382 operands[7] = part[1][0];
10386 operands[2] = part[0][1];
10387 operands[3] = part[0][0];
10388 operands[5] = part[1][1];
10389 operands[6] = part[1][0];
10396 operands[2] = part[0][0];
10397 operands[3] = part[0][1];
10398 operands[4] = part[0][2];
10399 operands[5] = part[1][0];
10400 operands[6] = part[1][1];
10401 operands[7] = part[1][2];
10405 operands[2] = part[0][0];
10406 operands[3] = part[0][1];
10407 operands[5] = part[1][0];
10408 operands[6] = part[1][1];
10411 emit_move_insn (operands[2], operands[5]);
10412 emit_move_insn (operands[3], operands[6]);
10414 emit_move_insn (operands[4], operands[7]);
10420 ix86_split_ashldi (operands, scratch)
10421 rtx *operands, scratch;
10423 rtx low[2], high[2];
10426 if (GET_CODE (operands[2]) == CONST_INT)
10428 split_di (operands, 2, low, high);
10429 count = INTVAL (operands[2]) & 63;
10433 emit_move_insn (high[0], low[1]);
10434 emit_move_insn (low[0], const0_rtx);
10437 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10441 if (!rtx_equal_p (operands[0], operands[1]))
10442 emit_move_insn (operands[0], operands[1]);
10443 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10444 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10449 if (!rtx_equal_p (operands[0], operands[1]))
10450 emit_move_insn (operands[0], operands[1]);
10452 split_di (operands, 1, low, high);
10454 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10455 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10457 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10459 if (! no_new_pseudos)
10460 scratch = force_reg (SImode, const0_rtx);
10462 emit_move_insn (scratch, const0_rtx);
10464 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10468 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10473 ix86_split_ashrdi (operands, scratch)
10474 rtx *operands, scratch;
10476 rtx low[2], high[2];
10479 if (GET_CODE (operands[2]) == CONST_INT)
10481 split_di (operands, 2, low, high);
10482 count = INTVAL (operands[2]) & 63;
10486 emit_move_insn (low[0], high[1]);
10488 if (! reload_completed)
10489 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10492 emit_move_insn (high[0], low[0]);
10493 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10497 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10501 if (!rtx_equal_p (operands[0], operands[1]))
10502 emit_move_insn (operands[0], operands[1]);
10503 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10504 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10509 if (!rtx_equal_p (operands[0], operands[1]))
10510 emit_move_insn (operands[0], operands[1]);
10512 split_di (operands, 1, low, high);
10514 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10515 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10517 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10519 if (! no_new_pseudos)
10520 scratch = gen_reg_rtx (SImode);
10521 emit_move_insn (scratch, high[0]);
10522 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10523 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10527 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10532 ix86_split_lshrdi (operands, scratch)
10533 rtx *operands, scratch;
10535 rtx low[2], high[2];
10538 if (GET_CODE (operands[2]) == CONST_INT)
10540 split_di (operands, 2, low, high);
10541 count = INTVAL (operands[2]) & 63;
10545 emit_move_insn (low[0], high[1]);
10546 emit_move_insn (high[0], const0_rtx);
10549 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10553 if (!rtx_equal_p (operands[0], operands[1]))
10554 emit_move_insn (operands[0], operands[1]);
10555 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10556 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10561 if (!rtx_equal_p (operands[0], operands[1]))
10562 emit_move_insn (operands[0], operands[1]);
10564 split_di (operands, 1, low, high);
10566 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10567 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10569 /* Heh. By reversing the arguments, we can reuse this pattern. */
10570 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10572 if (! no_new_pseudos)
10573 scratch = force_reg (SImode, const0_rtx);
10575 emit_move_insn (scratch, const0_rtx);
10577 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10581 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10585 /* Helper function for the string operations below. Dest VARIABLE whether
10586 it is aligned to VALUE bytes. If true, jump to the label. */
10588 ix86_expand_aligntest (variable, value)
10592 rtx label = gen_label_rtx ();
10593 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10594 if (GET_MODE (variable) == DImode)
10595 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10597 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10598 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10603 /* Adjust COUNTER by the VALUE. */
10605 ix86_adjust_counter (countreg, value)
10607 HOST_WIDE_INT value;
10609 if (GET_MODE (countreg) == DImode)
10610 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10612 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10615 /* Zero extend possibly SImode EXP to Pmode register. */
10617 ix86_zero_extend_to_Pmode (exp)
10621 if (GET_MODE (exp) == VOIDmode)
10622 return force_reg (Pmode, exp);
10623 if (GET_MODE (exp) == Pmode)
10624 return copy_to_mode_reg (Pmode, exp);
10625 r = gen_reg_rtx (Pmode);
10626 emit_insn (gen_zero_extendsidi2 (r, exp));
10630 /* Expand string move (memcpy) operation. Use i386 string operations when
10631 profitable. expand_clrstr contains similar code. */
10633 ix86_expand_movstr (dst, src, count_exp, align_exp)
10634 rtx dst, src, count_exp, align_exp;
10636 rtx srcreg, destreg, countreg;
10637 enum machine_mode counter_mode;
10638 HOST_WIDE_INT align = 0;
10639 unsigned HOST_WIDE_INT count = 0;
10644 if (GET_CODE (align_exp) == CONST_INT)
10645 align = INTVAL (align_exp);
10647 /* This simple hack avoids all inlining code and simplifies code below. */
10648 if (!TARGET_ALIGN_STRINGOPS)
10651 if (GET_CODE (count_exp) == CONST_INT)
10652 count = INTVAL (count_exp);
10654 /* Figure out proper mode for counter. For 32bits it is always SImode,
10655 for 64bits use SImode when possible, otherwise DImode.
10656 Set count to number of bytes copied when known at compile time. */
10657 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10658 || x86_64_zero_extended_value (count_exp))
10659 counter_mode = SImode;
10661 counter_mode = DImode;
10663 if (counter_mode != SImode && counter_mode != DImode)
10666 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10667 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10669 emit_insn (gen_cld ());
10671 /* When optimizing for size emit simple rep ; movsb instruction for
10672 counts not divisible by 4. */
10674 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10676 countreg = ix86_zero_extend_to_Pmode (count_exp);
10678 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10679 destreg, srcreg, countreg));
10681 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10682 destreg, srcreg, countreg));
10685 /* For constant aligned (or small unaligned) copies use rep movsl
10686 followed by code copying the rest. For PentiumPro ensure 8 byte
10687 alignment to allow rep movsl acceleration. */
10689 else if (count != 0
10691 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10692 || optimize_size || count < (unsigned int) 64))
10694 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10695 if (count & ~(size - 1))
10697 countreg = copy_to_mode_reg (counter_mode,
10698 GEN_INT ((count >> (size == 4 ? 2 : 3))
10699 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10700 countreg = ix86_zero_extend_to_Pmode (countreg);
10704 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10705 destreg, srcreg, countreg));
10707 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10708 destreg, srcreg, countreg));
10711 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10712 destreg, srcreg, countreg));
10714 if (size == 8 && (count & 0x04))
10715 emit_insn (gen_strmovsi (destreg, srcreg));
10717 emit_insn (gen_strmovhi (destreg, srcreg));
10719 emit_insn (gen_strmovqi (destreg, srcreg));
10721 /* The generic code based on the glibc implementation:
10722 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10723 allowing accelerated copying there)
10724 - copy the data using rep movsl
10725 - copy the rest. */
10730 int desired_alignment = (TARGET_PENTIUMPRO
10731 && (count == 0 || count >= (unsigned int) 260)
10732 ? 8 : UNITS_PER_WORD);
10734 /* In case we don't know anything about the alignment, default to
10735 library version, since it is usually equally fast and result in
10738 Also emit call when we know that the count is large and call overhead
10739 will not be important. */
10740 if (!TARGET_INLINE_ALL_STRINGOPS
10741 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10747 if (TARGET_SINGLE_STRINGOP)
10748 emit_insn (gen_cld ());
10750 countreg2 = gen_reg_rtx (Pmode);
10751 countreg = copy_to_mode_reg (counter_mode, count_exp);
10753 /* We don't use loops to align destination and to copy parts smaller
10754 than 4 bytes, because gcc is able to optimize such code better (in
10755 the case the destination or the count really is aligned, gcc is often
10756 able to predict the branches) and also it is friendlier to the
10757 hardware branch prediction.
10759 Using loops is beneficial for generic case, because we can
10760 handle small counts using the loops. Many CPUs (such as Athlon)
10761 have large REP prefix setup costs.
10763 This is quite costy. Maybe we can revisit this decision later or
10764 add some customizability to this code. */
10766 if (count == 0 && align < desired_alignment)
10768 label = gen_label_rtx ();
10769 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10770 LEU, 0, counter_mode, 1, label);
10774 rtx label = ix86_expand_aligntest (destreg, 1);
10775 emit_insn (gen_strmovqi (destreg, srcreg));
10776 ix86_adjust_counter (countreg, 1);
10777 emit_label (label);
10778 LABEL_NUSES (label) = 1;
10782 rtx label = ix86_expand_aligntest (destreg, 2);
10783 emit_insn (gen_strmovhi (destreg, srcreg));
10784 ix86_adjust_counter (countreg, 2);
10785 emit_label (label);
10786 LABEL_NUSES (label) = 1;
10788 if (align <= 4 && desired_alignment > 4)
10790 rtx label = ix86_expand_aligntest (destreg, 4);
10791 emit_insn (gen_strmovsi (destreg, srcreg));
10792 ix86_adjust_counter (countreg, 4);
10793 emit_label (label);
10794 LABEL_NUSES (label) = 1;
10797 if (label && desired_alignment > 4 && !TARGET_64BIT)
10799 emit_label (label);
10800 LABEL_NUSES (label) = 1;
10803 if (!TARGET_SINGLE_STRINGOP)
10804 emit_insn (gen_cld ());
10807 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10809 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10810 destreg, srcreg, countreg2));
10814 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10815 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10816 destreg, srcreg, countreg2));
10821 emit_label (label);
10822 LABEL_NUSES (label) = 1;
10824 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10825 emit_insn (gen_strmovsi (destreg, srcreg));
10826 if ((align <= 4 || count == 0) && TARGET_64BIT)
10828 rtx label = ix86_expand_aligntest (countreg, 4);
10829 emit_insn (gen_strmovsi (destreg, srcreg));
10830 emit_label (label);
10831 LABEL_NUSES (label) = 1;
10833 if (align > 2 && count != 0 && (count & 2))
10834 emit_insn (gen_strmovhi (destreg, srcreg));
10835 if (align <= 2 || count == 0)
10837 rtx label = ix86_expand_aligntest (countreg, 2);
10838 emit_insn (gen_strmovhi (destreg, srcreg));
10839 emit_label (label);
10840 LABEL_NUSES (label) = 1;
10842 if (align > 1 && count != 0 && (count & 1))
10843 emit_insn (gen_strmovqi (destreg, srcreg));
10844 if (align <= 1 || count == 0)
10846 rtx label = ix86_expand_aligntest (countreg, 1);
10847 emit_insn (gen_strmovqi (destreg, srcreg));
10848 emit_label (label);
10849 LABEL_NUSES (label) = 1;
10853 insns = get_insns ();
10856 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10861 /* Expand string clear operation (bzero). Use i386 string operations when
10862 profitable. expand_movstr contains similar code. */
10864 ix86_expand_clrstr (src, count_exp, align_exp)
10865 rtx src, count_exp, align_exp;
10867 rtx destreg, zeroreg, countreg;
10868 enum machine_mode counter_mode;
10869 HOST_WIDE_INT align = 0;
10870 unsigned HOST_WIDE_INT count = 0;
10872 if (GET_CODE (align_exp) == CONST_INT)
10873 align = INTVAL (align_exp);
10875 /* This simple hack avoids all inlining code and simplifies code below. */
10876 if (!TARGET_ALIGN_STRINGOPS)
10879 if (GET_CODE (count_exp) == CONST_INT)
10880 count = INTVAL (count_exp);
10881 /* Figure out proper mode for counter. For 32bits it is always SImode,
10882 for 64bits use SImode when possible, otherwise DImode.
10883 Set count to number of bytes copied when known at compile time. */
10884 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10885 || x86_64_zero_extended_value (count_exp))
10886 counter_mode = SImode;
10888 counter_mode = DImode;
10890 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10892 emit_insn (gen_cld ());
10894 /* When optimizing for size emit simple rep ; movsb instruction for
10895 counts not divisible by 4. */
10897 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10899 countreg = ix86_zero_extend_to_Pmode (count_exp);
10900 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10902 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10903 destreg, countreg));
10905 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10906 destreg, countreg));
10908 else if (count != 0
10910 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10911 || optimize_size || count < (unsigned int) 64))
10913 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10914 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10915 if (count & ~(size - 1))
10917 countreg = copy_to_mode_reg (counter_mode,
10918 GEN_INT ((count >> (size == 4 ? 2 : 3))
10919 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10920 countreg = ix86_zero_extend_to_Pmode (countreg);
10924 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10925 destreg, countreg));
10927 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10928 destreg, countreg));
10931 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10932 destreg, countreg));
10934 if (size == 8 && (count & 0x04))
10935 emit_insn (gen_strsetsi (destreg,
10936 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10938 emit_insn (gen_strsethi (destreg,
10939 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10941 emit_insn (gen_strsetqi (destreg,
10942 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10948 /* Compute desired alignment of the string operation. */
10949 int desired_alignment = (TARGET_PENTIUMPRO
10950 && (count == 0 || count >= (unsigned int) 260)
10951 ? 8 : UNITS_PER_WORD);
10953 /* In case we don't know anything about the alignment, default to
10954 library version, since it is usually equally fast and result in
10957 Also emit call when we know that the count is large and call overhead
10958 will not be important. */
10959 if (!TARGET_INLINE_ALL_STRINGOPS
10960 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10963 if (TARGET_SINGLE_STRINGOP)
10964 emit_insn (gen_cld ());
10966 countreg2 = gen_reg_rtx (Pmode);
10967 countreg = copy_to_mode_reg (counter_mode, count_exp);
10968 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10970 if (count == 0 && align < desired_alignment)
10972 label = gen_label_rtx ();
10973 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10974 LEU, 0, counter_mode, 1, label);
10978 rtx label = ix86_expand_aligntest (destreg, 1);
10979 emit_insn (gen_strsetqi (destreg,
10980 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10981 ix86_adjust_counter (countreg, 1);
10982 emit_label (label);
10983 LABEL_NUSES (label) = 1;
10987 rtx label = ix86_expand_aligntest (destreg, 2);
10988 emit_insn (gen_strsethi (destreg,
10989 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10990 ix86_adjust_counter (countreg, 2);
10991 emit_label (label);
10992 LABEL_NUSES (label) = 1;
10994 if (align <= 4 && desired_alignment > 4)
10996 rtx label = ix86_expand_aligntest (destreg, 4);
10997 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10998 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11000 ix86_adjust_counter (countreg, 4);
11001 emit_label (label);
11002 LABEL_NUSES (label) = 1;
11005 if (label && desired_alignment > 4 && !TARGET_64BIT)
11007 emit_label (label);
11008 LABEL_NUSES (label) = 1;
11012 if (!TARGET_SINGLE_STRINGOP)
11013 emit_insn (gen_cld ());
11016 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11018 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11019 destreg, countreg2));
11023 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11024 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11025 destreg, countreg2));
11029 emit_label (label);
11030 LABEL_NUSES (label) = 1;
11033 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11034 emit_insn (gen_strsetsi (destreg,
11035 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11036 if (TARGET_64BIT && (align <= 4 || count == 0))
11038 rtx label = ix86_expand_aligntest (countreg, 4);
11039 emit_insn (gen_strsetsi (destreg,
11040 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11041 emit_label (label);
11042 LABEL_NUSES (label) = 1;
11044 if (align > 2 && count != 0 && (count & 2))
11045 emit_insn (gen_strsethi (destreg,
11046 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11047 if (align <= 2 || count == 0)
11049 rtx label = ix86_expand_aligntest (countreg, 2);
11050 emit_insn (gen_strsethi (destreg,
11051 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11052 emit_label (label);
11053 LABEL_NUSES (label) = 1;
11055 if (align > 1 && count != 0 && (count & 1))
11056 emit_insn (gen_strsetqi (destreg,
11057 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11058 if (align <= 1 || count == 0)
11060 rtx label = ix86_expand_aligntest (countreg, 1);
11061 emit_insn (gen_strsetqi (destreg,
11062 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11063 emit_label (label);
11064 LABEL_NUSES (label) = 1;
11069 /* Expand strlen. */
11071 ix86_expand_strlen (out, src, eoschar, align)
11072 rtx out, src, eoschar, align;
11074 rtx addr, scratch1, scratch2, scratch3, scratch4;
11076 /* The generic case of strlen expander is long. Avoid it's
11077 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11079 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11080 && !TARGET_INLINE_ALL_STRINGOPS
11082 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11085 addr = force_reg (Pmode, XEXP (src, 0));
11086 scratch1 = gen_reg_rtx (Pmode);
11088 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11091 /* Well it seems that some optimizer does not combine a call like
11092 foo(strlen(bar), strlen(bar));
11093 when the move and the subtraction is done here. It does calculate
11094 the length just once when these instructions are done inside of
11095 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11096 often used and I use one fewer register for the lifetime of
11097 output_strlen_unroll() this is better. */
11099 emit_move_insn (out, addr);
11101 ix86_expand_strlensi_unroll_1 (out, align);
11103 /* strlensi_unroll_1 returns the address of the zero at the end of
11104 the string, like memchr(), so compute the length by subtracting
11105 the start address. */
11107 emit_insn (gen_subdi3 (out, out, addr));
11109 emit_insn (gen_subsi3 (out, out, addr));
11113 scratch2 = gen_reg_rtx (Pmode);
11114 scratch3 = gen_reg_rtx (Pmode);
11115 scratch4 = force_reg (Pmode, constm1_rtx);
11117 emit_move_insn (scratch3, addr);
11118 eoschar = force_reg (QImode, eoschar);
11120 emit_insn (gen_cld ());
11123 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11124 align, scratch4, scratch3));
11125 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11126 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11130 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11131 align, scratch4, scratch3));
11132 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11133 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11139 /* Expand the appropriate insns for doing strlen if not just doing
11142 out = result, initialized with the start address
11143 align_rtx = alignment of the address.
11144 scratch = scratch register, initialized with the startaddress when
11145 not aligned, otherwise undefined
11147 This is just the body. It needs the initialisations mentioned above and
11148 some address computing at the end. These things are done in i386.md. */
11151 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11152 rtx out, align_rtx;
11156 rtx align_2_label = NULL_RTX;
11157 rtx align_3_label = NULL_RTX;
11158 rtx align_4_label = gen_label_rtx ();
11159 rtx end_0_label = gen_label_rtx ();
11161 rtx tmpreg = gen_reg_rtx (SImode);
11162 rtx scratch = gen_reg_rtx (SImode);
11165 if (GET_CODE (align_rtx) == CONST_INT)
11166 align = INTVAL (align_rtx);
11168 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11170 /* Is there a known alignment and is it less than 4? */
11173 rtx scratch1 = gen_reg_rtx (Pmode);
11174 emit_move_insn (scratch1, out);
11175 /* Is there a known alignment and is it not 2? */
11178 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11179 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11181 /* Leave just the 3 lower bits. */
11182 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11183 NULL_RTX, 0, OPTAB_WIDEN);
11185 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11186 Pmode, 1, align_4_label);
11187 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11188 Pmode, 1, align_2_label);
11189 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11190 Pmode, 1, align_3_label);
11194 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11195 check if is aligned to 4 - byte. */
11197 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11198 NULL_RTX, 0, OPTAB_WIDEN);
11200 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11201 Pmode, 1, align_4_label);
11204 mem = gen_rtx_MEM (QImode, out);
11206 /* Now compare the bytes. */
11208 /* Compare the first n unaligned byte on a byte per byte basis. */
11209 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11210 QImode, 1, end_0_label);
11212 /* Increment the address. */
11214 emit_insn (gen_adddi3 (out, out, const1_rtx));
11216 emit_insn (gen_addsi3 (out, out, const1_rtx));
11218 /* Not needed with an alignment of 2 */
11221 emit_label (align_2_label);
11223 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11227 emit_insn (gen_adddi3 (out, out, const1_rtx));
11229 emit_insn (gen_addsi3 (out, out, const1_rtx));
11231 emit_label (align_3_label);
11234 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11238 emit_insn (gen_adddi3 (out, out, const1_rtx));
11240 emit_insn (gen_addsi3 (out, out, const1_rtx));
11243 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11244 align this loop. It gives only huge programs, but does not help to
11246 emit_label (align_4_label);
11248 mem = gen_rtx_MEM (SImode, out);
11249 emit_move_insn (scratch, mem);
11251 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11253 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11255 /* This formula yields a nonzero result iff one of the bytes is zero.
11256 This saves three branches inside loop and many cycles. */
11258 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11259 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11260 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11261 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11262 gen_int_mode (0x80808080, SImode)));
11263 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11268 rtx reg = gen_reg_rtx (SImode);
11269 rtx reg2 = gen_reg_rtx (Pmode);
11270 emit_move_insn (reg, tmpreg);
11271 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11273 /* If zero is not in the first two bytes, move two bytes forward. */
11274 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11275 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11276 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11277 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11278 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11281 /* Emit lea manually to avoid clobbering of flags. */
11282 emit_insn (gen_rtx_SET (SImode, reg2,
11283 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11285 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11286 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11287 emit_insn (gen_rtx_SET (VOIDmode, out,
11288 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11295 rtx end_2_label = gen_label_rtx ();
11296 /* Is zero in the first two bytes? */
11298 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11299 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11300 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11301 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11302 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11304 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11305 JUMP_LABEL (tmp) = end_2_label;
11307 /* Not in the first two. Move two bytes forward. */
11308 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11310 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11312 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11314 emit_label (end_2_label);
11318 /* Avoid branch in fixing the byte. */
11319 tmpreg = gen_lowpart (QImode, tmpreg);
11320 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11322 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
11324 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
11326 emit_label (end_0_label);
11330 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11331 rtx retval, fnaddr, callarg1, callarg2, pop;
11334 rtx use = NULL, call;
11336 if (pop == const0_rtx)
11338 if (TARGET_64BIT && pop)
11342 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11343 fnaddr = machopic_indirect_call_target (fnaddr);
11345 /* Static functions and indirect calls don't need the pic register. */
11346 if (! TARGET_64BIT && flag_pic
11347 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11348 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11349 use_reg (&use, pic_offset_table_rtx);
11351 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11353 rtx al = gen_rtx_REG (QImode, 0);
11354 emit_move_insn (al, callarg2);
11355 use_reg (&use, al);
11357 #endif /* TARGET_MACHO */
11359 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11361 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11362 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11364 if (sibcall && TARGET_64BIT
11365 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11368 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11369 fnaddr = gen_rtx_REG (Pmode, 40);
11370 emit_move_insn (fnaddr, addr);
11371 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11374 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11376 call = gen_rtx_SET (VOIDmode, retval, call);
11379 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11380 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11381 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11384 call = emit_call_insn (call);
11386 CALL_INSN_FUNCTION_USAGE (call) = use;
11390 /* Clear stack slot assignments remembered from previous functions.
11391 This is called from INIT_EXPANDERS once before RTL is emitted for each
11394 static struct machine_function *
11395 ix86_init_machine_status ()
11397 return ggc_alloc_cleared (sizeof (struct machine_function));
11400 /* Return a MEM corresponding to a stack slot with mode MODE.
11401 Allocate a new slot if necessary.
11403 The RTL for a function can have several slots available: N is
11404 which slot to use. */
11407 assign_386_stack_local (mode, n)
11408 enum machine_mode mode;
11411 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11414 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11415 ix86_stack_locals[(int) mode][n]
11416 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11418 return ix86_stack_locals[(int) mode][n];
11421 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11423 static GTY(()) rtx ix86_tls_symbol;
11425 ix86_tls_get_addr ()
11428 if (!ix86_tls_symbol)
11430 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11431 (TARGET_GNU_TLS && !TARGET_64BIT)
11432 ? "___tls_get_addr"
11433 : "__tls_get_addr");
11436 return ix86_tls_symbol;
11439 /* Calculate the length of the memory address in the instruction
11440 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11443 memory_address_length (addr)
11446 struct ix86_address parts;
11447 rtx base, index, disp;
11450 if (GET_CODE (addr) == PRE_DEC
11451 || GET_CODE (addr) == POST_INC
11452 || GET_CODE (addr) == PRE_MODIFY
11453 || GET_CODE (addr) == POST_MODIFY)
11456 if (! ix86_decompose_address (addr, &parts))
11460 index = parts.index;
11464 /* Register Indirect. */
11465 if (base && !index && !disp)
11467 /* Special cases: ebp and esp need the two-byte modrm form. */
11468 if (addr == stack_pointer_rtx
11469 || addr == arg_pointer_rtx
11470 || addr == frame_pointer_rtx
11471 || addr == hard_frame_pointer_rtx)
11475 /* Direct Addressing. */
11476 else if (disp && !base && !index)
11481 /* Find the length of the displacement constant. */
11484 if (GET_CODE (disp) == CONST_INT
11485 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11491 /* An index requires the two-byte modrm form. */
11499 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11500 is set, expect that insn have 8bit immediate alternative. */
11502 ix86_attr_length_immediate_default (insn, shortform)
11508 extract_insn_cached (insn);
11509 for (i = recog_data.n_operands - 1; i >= 0; --i)
11510 if (CONSTANT_P (recog_data.operand[i]))
11515 && GET_CODE (recog_data.operand[i]) == CONST_INT
11516 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11520 switch (get_attr_mode (insn))
11531 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11536 fatal_insn ("unknown insn mode", insn);
11542 /* Compute default value for "length_address" attribute. */
11544 ix86_attr_length_address_default (insn)
11548 extract_insn_cached (insn);
11549 for (i = recog_data.n_operands - 1; i >= 0; --i)
11550 if (GET_CODE (recog_data.operand[i]) == MEM)
11552 return memory_address_length (XEXP (recog_data.operand[i], 0));
11558 /* Return the maximum number of instructions a cpu can issue. */
11565 case PROCESSOR_PENTIUM:
11569 case PROCESSOR_PENTIUMPRO:
11570 case PROCESSOR_PENTIUM4:
11571 case PROCESSOR_ATHLON:
11580 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11581 by DEP_INSN and nothing set by DEP_INSN. */
11584 ix86_flags_dependant (insn, dep_insn, insn_type)
11585 rtx insn, dep_insn;
11586 enum attr_type insn_type;
11590 /* Simplify the test for uninteresting insns. */
11591 if (insn_type != TYPE_SETCC
11592 && insn_type != TYPE_ICMOV
11593 && insn_type != TYPE_FCMOV
11594 && insn_type != TYPE_IBR)
11597 if ((set = single_set (dep_insn)) != 0)
11599 set = SET_DEST (set);
11602 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11603 && XVECLEN (PATTERN (dep_insn), 0) == 2
11604 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11605 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11607 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11608 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11613 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11616 /* This test is true if the dependent insn reads the flags but
11617 not any other potentially set register. */
11618 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11621 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11627 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11628 address with operands set by DEP_INSN. */
11631 ix86_agi_dependant (insn, dep_insn, insn_type)
11632 rtx insn, dep_insn;
11633 enum attr_type insn_type;
11637 if (insn_type == TYPE_LEA
11640 addr = PATTERN (insn);
11641 if (GET_CODE (addr) == SET)
11643 else if (GET_CODE (addr) == PARALLEL
11644 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11645 addr = XVECEXP (addr, 0, 0);
11648 addr = SET_SRC (addr);
11653 extract_insn_cached (insn);
11654 for (i = recog_data.n_operands - 1; i >= 0; --i)
11655 if (GET_CODE (recog_data.operand[i]) == MEM)
11657 addr = XEXP (recog_data.operand[i], 0);
11664 return modified_in_p (addr, dep_insn);
11668 ix86_adjust_cost (insn, link, dep_insn, cost)
11669 rtx insn, link, dep_insn;
11672 enum attr_type insn_type, dep_insn_type;
11673 enum attr_memory memory, dep_memory;
11675 int dep_insn_code_number;
11677 /* Anti and output dependencies have zero cost on all CPUs. */
11678 if (REG_NOTE_KIND (link) != 0)
11681 dep_insn_code_number = recog_memoized (dep_insn);
11683 /* If we can't recognize the insns, we can't really do anything. */
11684 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11687 insn_type = get_attr_type (insn);
11688 dep_insn_type = get_attr_type (dep_insn);
11692 case PROCESSOR_PENTIUM:
11693 /* Address Generation Interlock adds a cycle of latency. */
11694 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11697 /* ??? Compares pair with jump/setcc. */
11698 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11701 /* Floating point stores require value to be ready one cycle earlier. */
11702 if (insn_type == TYPE_FMOV
11703 && get_attr_memory (insn) == MEMORY_STORE
11704 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11708 case PROCESSOR_PENTIUMPRO:
11709 memory = get_attr_memory (insn);
11710 dep_memory = get_attr_memory (dep_insn);
11712 /* Since we can't represent delayed latencies of load+operation,
11713 increase the cost here for non-imov insns. */
11714 if (dep_insn_type != TYPE_IMOV
11715 && dep_insn_type != TYPE_FMOV
11716 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11719 /* INT->FP conversion is expensive. */
11720 if (get_attr_fp_int_src (dep_insn))
11723 /* There is one cycle extra latency between an FP op and a store. */
11724 if (insn_type == TYPE_FMOV
11725 && (set = single_set (dep_insn)) != NULL_RTX
11726 && (set2 = single_set (insn)) != NULL_RTX
11727 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11728 && GET_CODE (SET_DEST (set2)) == MEM)
11731 /* Show ability of reorder buffer to hide latency of load by executing
11732 in parallel with previous instruction in case
11733 previous instruction is not needed to compute the address. */
11734 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11735 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11737 /* Claim moves to take one cycle, as core can issue one load
11738 at time and the next load can start cycle later. */
11739 if (dep_insn_type == TYPE_IMOV
11740 || dep_insn_type == TYPE_FMOV)
11748 memory = get_attr_memory (insn);
11749 dep_memory = get_attr_memory (dep_insn);
11750 /* The esp dependency is resolved before the instruction is really
11752 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11753 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11756 /* Since we can't represent delayed latencies of load+operation,
11757 increase the cost here for non-imov insns. */
11758 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11759 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11761 /* INT->FP conversion is expensive. */
11762 if (get_attr_fp_int_src (dep_insn))
11765 /* Show ability of reorder buffer to hide latency of load by executing
11766 in parallel with previous instruction in case
11767 previous instruction is not needed to compute the address. */
11768 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11769 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11771 /* Claim moves to take one cycle, as core can issue one load
11772 at time and the next load can start cycle later. */
11773 if (dep_insn_type == TYPE_IMOV
11774 || dep_insn_type == TYPE_FMOV)
11783 case PROCESSOR_ATHLON:
11785 memory = get_attr_memory (insn);
11786 dep_memory = get_attr_memory (dep_insn);
11788 /* Show ability of reorder buffer to hide latency of load by executing
11789 in parallel with previous instruction in case
11790 previous instruction is not needed to compute the address. */
11791 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11792 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11794 /* Claim moves to take one cycle, as core can issue one load
11795 at time and the next load can start cycle later. */
11796 if (dep_insn_type == TYPE_IMOV
11797 || dep_insn_type == TYPE_FMOV)
11799 else if (cost >= 3)
11814 struct ppro_sched_data
11817 int issued_this_cycle;
11821 static enum attr_ppro_uops
11822 ix86_safe_ppro_uops (insn)
11825 if (recog_memoized (insn) >= 0)
11826 return get_attr_ppro_uops (insn);
11828 return PPRO_UOPS_MANY;
11832 ix86_dump_ppro_packet (dump)
11835 if (ix86_sched_data.ppro.decode[0])
11837 fprintf (dump, "PPRO packet: %d",
11838 INSN_UID (ix86_sched_data.ppro.decode[0]));
11839 if (ix86_sched_data.ppro.decode[1])
11840 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11841 if (ix86_sched_data.ppro.decode[2])
11842 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11843 fputc ('\n', dump);
11847 /* We're beginning a new block. Initialize data structures as necessary. */
11850 ix86_sched_init (dump, sched_verbose, veclen)
11851 FILE *dump ATTRIBUTE_UNUSED;
11852 int sched_verbose ATTRIBUTE_UNUSED;
11853 int veclen ATTRIBUTE_UNUSED;
11855 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11858 /* Shift INSN to SLOT, and shift everything else down. */
11861 ix86_reorder_insn (insnp, slot)
11868 insnp[0] = insnp[1];
11869 while (++insnp != slot);
11875 ix86_sched_reorder_ppro (ready, e_ready)
11880 enum attr_ppro_uops cur_uops;
11881 int issued_this_cycle;
11885 /* At this point .ppro.decode contains the state of the three
11886 decoders from last "cycle". That is, those insns that were
11887 actually independent. But here we're scheduling for the
11888 decoder, and we may find things that are decodable in the
11891 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11892 issued_this_cycle = 0;
11895 cur_uops = ix86_safe_ppro_uops (*insnp);
11897 /* If the decoders are empty, and we've a complex insn at the
11898 head of the priority queue, let it issue without complaint. */
11899 if (decode[0] == NULL)
11901 if (cur_uops == PPRO_UOPS_MANY)
11903 decode[0] = *insnp;
11907 /* Otherwise, search for a 2-4 uop unsn to issue. */
11908 while (cur_uops != PPRO_UOPS_FEW)
11910 if (insnp == ready)
11912 cur_uops = ix86_safe_ppro_uops (*--insnp);
11915 /* If so, move it to the head of the line. */
11916 if (cur_uops == PPRO_UOPS_FEW)
11917 ix86_reorder_insn (insnp, e_ready);
11919 /* Issue the head of the queue. */
11920 issued_this_cycle = 1;
11921 decode[0] = *e_ready--;
11924 /* Look for simple insns to fill in the other two slots. */
11925 for (i = 1; i < 3; ++i)
11926 if (decode[i] == NULL)
11928 if (ready > e_ready)
11932 cur_uops = ix86_safe_ppro_uops (*insnp);
11933 while (cur_uops != PPRO_UOPS_ONE)
11935 if (insnp == ready)
11937 cur_uops = ix86_safe_ppro_uops (*--insnp);
11940 /* Found one. Move it to the head of the queue and issue it. */
11941 if (cur_uops == PPRO_UOPS_ONE)
11943 ix86_reorder_insn (insnp, e_ready);
11944 decode[i] = *e_ready--;
11945 issued_this_cycle++;
11949 /* ??? Didn't find one. Ideally, here we would do a lazy split
11950 of 2-uop insns, issue one and queue the other. */
11954 if (issued_this_cycle == 0)
11955 issued_this_cycle = 1;
11956 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11959 /* We are about to being issuing insns for this clock cycle.
11960 Override the default sort algorithm to better slot instructions. */
11962 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11963 FILE *dump ATTRIBUTE_UNUSED;
11964 int sched_verbose ATTRIBUTE_UNUSED;
11967 int clock_var ATTRIBUTE_UNUSED;
11969 int n_ready = *n_readyp;
11970 rtx *e_ready = ready + n_ready - 1;
11972 /* Make sure to go ahead and initialize key items in
11973 ix86_sched_data if we are not going to bother trying to
11974 reorder the ready queue. */
11977 ix86_sched_data.ppro.issued_this_cycle = 1;
11986 case PROCESSOR_PENTIUMPRO:
11987 ix86_sched_reorder_ppro (ready, e_ready);
11992 return ix86_issue_rate ();
11995 /* We are about to issue INSN. Return the number of insns left on the
11996 ready queue that can be issued this cycle. */
11999 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12003 int can_issue_more;
12009 return can_issue_more - 1;
12011 case PROCESSOR_PENTIUMPRO:
12013 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12015 if (uops == PPRO_UOPS_MANY)
12018 ix86_dump_ppro_packet (dump);
12019 ix86_sched_data.ppro.decode[0] = insn;
12020 ix86_sched_data.ppro.decode[1] = NULL;
12021 ix86_sched_data.ppro.decode[2] = NULL;
12023 ix86_dump_ppro_packet (dump);
12024 ix86_sched_data.ppro.decode[0] = NULL;
12026 else if (uops == PPRO_UOPS_FEW)
12029 ix86_dump_ppro_packet (dump);
12030 ix86_sched_data.ppro.decode[0] = insn;
12031 ix86_sched_data.ppro.decode[1] = NULL;
12032 ix86_sched_data.ppro.decode[2] = NULL;
12036 for (i = 0; i < 3; ++i)
12037 if (ix86_sched_data.ppro.decode[i] == NULL)
12039 ix86_sched_data.ppro.decode[i] = insn;
12047 ix86_dump_ppro_packet (dump);
12048 ix86_sched_data.ppro.decode[0] = NULL;
12049 ix86_sched_data.ppro.decode[1] = NULL;
12050 ix86_sched_data.ppro.decode[2] = NULL;
12054 return --ix86_sched_data.ppro.issued_this_cycle;
12059 ia32_use_dfa_pipeline_interface ()
12061 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12066 /* How many alternative schedules to try. This should be as wide as the
12067 scheduling freedom in the DFA, but no wider. Making this value too
12068 large results extra work for the scheduler. */
12071 ia32_multipass_dfa_lookahead ()
12073 if (ix86_cpu == PROCESSOR_PENTIUM)
12080 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12081 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12085 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12087 rtx dstref, srcref, dstreg, srcreg;
12091 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12093 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12097 /* Subroutine of above to actually do the updating by recursively walking
12101 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12103 rtx dstref, srcref, dstreg, srcreg;
12105 enum rtx_code code = GET_CODE (x);
12106 const char *format_ptr = GET_RTX_FORMAT (code);
12109 if (code == MEM && XEXP (x, 0) == dstreg)
12110 MEM_COPY_ATTRIBUTES (x, dstref);
12111 else if (code == MEM && XEXP (x, 0) == srcreg)
12112 MEM_COPY_ATTRIBUTES (x, srcref);
12114 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12116 if (*format_ptr == 'e')
12117 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12119 else if (*format_ptr == 'E')
12120 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12121 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12126 /* Compute the alignment given to a constant that is being placed in memory.
12127 EXP is the constant and ALIGN is the alignment that the object would
12129 The value of this function is used instead of that alignment to align
12133 ix86_constant_alignment (exp, align)
12137 if (TREE_CODE (exp) == REAL_CST)
12139 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12141 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12144 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12151 /* Compute the alignment for a static variable.
12152 TYPE is the data type, and ALIGN is the alignment that
12153 the object would ordinarily have. The value of this function is used
12154 instead of that alignment to align the object. */
12157 ix86_data_alignment (type, align)
12161 if (AGGREGATE_TYPE_P (type)
12162 && TYPE_SIZE (type)
12163 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12164 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12165 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12168 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12169 to 16byte boundary. */
12172 if (AGGREGATE_TYPE_P (type)
12173 && TYPE_SIZE (type)
12174 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12175 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12176 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12180 if (TREE_CODE (type) == ARRAY_TYPE)
12182 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12184 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12187 else if (TREE_CODE (type) == COMPLEX_TYPE)
12190 if (TYPE_MODE (type) == DCmode && align < 64)
12192 if (TYPE_MODE (type) == XCmode && align < 128)
12195 else if ((TREE_CODE (type) == RECORD_TYPE
12196 || TREE_CODE (type) == UNION_TYPE
12197 || TREE_CODE (type) == QUAL_UNION_TYPE)
12198 && TYPE_FIELDS (type))
12200 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12202 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12205 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12206 || TREE_CODE (type) == INTEGER_TYPE)
12208 if (TYPE_MODE (type) == DFmode && align < 64)
12210 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12217 /* Compute the alignment for a local variable.
12218 TYPE is the data type, and ALIGN is the alignment that
12219 the object would ordinarily have. The value of this macro is used
12220 instead of that alignment to align the object. */
12223 ix86_local_alignment (type, align)
12227 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12228 to 16byte boundary. */
12231 if (AGGREGATE_TYPE_P (type)
12232 && TYPE_SIZE (type)
12233 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12234 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12235 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12238 if (TREE_CODE (type) == ARRAY_TYPE)
12240 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12242 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12245 else if (TREE_CODE (type) == COMPLEX_TYPE)
12247 if (TYPE_MODE (type) == DCmode && align < 64)
12249 if (TYPE_MODE (type) == XCmode && align < 128)
12252 else if ((TREE_CODE (type) == RECORD_TYPE
12253 || TREE_CODE (type) == UNION_TYPE
12254 || TREE_CODE (type) == QUAL_UNION_TYPE)
12255 && TYPE_FIELDS (type))
12257 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12259 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12262 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12263 || TREE_CODE (type) == INTEGER_TYPE)
12266 if (TYPE_MODE (type) == DFmode && align < 64)
12268 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12274 /* Emit RTL insns to initialize the variable parts of a trampoline.
12275 FNADDR is an RTX for the address of the function's pure code.
12276 CXT is an RTX for the static chain value for the function. */
12278 x86_initialize_trampoline (tramp, fnaddr, cxt)
12279 rtx tramp, fnaddr, cxt;
12283 /* Compute offset from the end of the jmp to the target function. */
12284 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12285 plus_constant (tramp, 10),
12286 NULL_RTX, 1, OPTAB_DIRECT);
12287 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12288 gen_int_mode (0xb9, QImode));
12289 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12290 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12291 gen_int_mode (0xe9, QImode));
12292 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12297 /* Try to load address using shorter movl instead of movabs.
12298 We may want to support movq for kernel mode, but kernel does not use
12299 trampolines at the moment. */
12300 if (x86_64_zero_extended_value (fnaddr))
12302 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12303 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12304 gen_int_mode (0xbb41, HImode));
12305 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12306 gen_lowpart (SImode, fnaddr));
12311 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12312 gen_int_mode (0xbb49, HImode));
12313 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12317 /* Load static chain using movabs to r10. */
12318 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12319 gen_int_mode (0xba49, HImode));
12320 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12323 /* Jump to the r11 */
12324 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12325 gen_int_mode (0xff49, HImode));
12326 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12327 gen_int_mode (0xe3, QImode));
12329 if (offset > TRAMPOLINE_SIZE)
12333 #ifdef TRANSFER_FROM_TRAMPOLINE
12334 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12335 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12339 #define def_builtin(MASK, NAME, TYPE, CODE) \
12341 if ((MASK) & target_flags) \
12342 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12343 NULL, NULL_TREE); \
12346 struct builtin_description
12348 const unsigned int mask;
12349 const enum insn_code icode;
12350 const char *const name;
12351 const enum ix86_builtins code;
12352 const enum rtx_code comparison;
12353 const unsigned int flag;
12356 /* Used for builtins that are enabled both by -msse and -msse2. */
12357 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12359 static const struct builtin_description bdesc_comi[] =
12361 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12362 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12363 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12364 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12365 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12366 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12367 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12368 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12369 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12370 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12371 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12372 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12373 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12374 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12375 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12376 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12377 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12378 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12379 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12380 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12381 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12382 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12383 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12384 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12387 static const struct builtin_description bdesc_2arg[] =
12390 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12391 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12392 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12393 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12394 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12395 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12396 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12397 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12399 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12400 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12401 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12402 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12403 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12404 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12405 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12406 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12407 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12408 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12409 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12410 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12411 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12412 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12413 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12414 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12415 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12416 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12417 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12418 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12420 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12421 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12422 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12423 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12425 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12426 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12427 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12428 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12430 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12431 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12432 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12433 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12434 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12437 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12438 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12439 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12440 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12441 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12442 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12444 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12445 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12446 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12447 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12448 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12449 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12450 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12451 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12453 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12454 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12455 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12457 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12458 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12459 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12460 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12462 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12463 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12465 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12466 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12467 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12468 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12469 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12470 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12472 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12473 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12474 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12475 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12477 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12478 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12479 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12480 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12481 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12482 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12485 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12486 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12487 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12489 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12490 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12492 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12493 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12494 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12495 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12496 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12497 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12499 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12500 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12501 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12502 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12503 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12504 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12506 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12507 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12508 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12509 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12511 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12512 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12515 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12516 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12517 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12518 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12519 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12520 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12521 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12522 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12524 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12525 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12526 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12527 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12528 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12529 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12530 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12531 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12532 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12533 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12534 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12535 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12536 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12537 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12538 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12539 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12540 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12541 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12542 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12543 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12545 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12546 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12547 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12548 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12550 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12551 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12552 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12553 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12555 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12556 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12557 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12560 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12561 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12562 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12563 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12564 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12565 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12566 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12567 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12569 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12570 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12571 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12572 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12573 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12574 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12575 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12576 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12578 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12579 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12580 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12581 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12583 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12584 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12585 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12586 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12588 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12589 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12591 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12592 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12593 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12594 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12595 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12596 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12598 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12599 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12600 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12601 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12603 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12604 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12605 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12606 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12607 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12608 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12609 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12610 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12612 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12613 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12614 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12616 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12617 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12619 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12620 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12621 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12622 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12623 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12624 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12626 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12627 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12628 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12629 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12630 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12631 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12633 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12634 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12635 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12636 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12638 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12640 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12641 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12642 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12645 static const struct builtin_description bdesc_1arg[] =
12647 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12648 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12650 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12651 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12652 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12654 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12655 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12656 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12657 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12659 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12660 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12661 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12662 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12664 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12666 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12667 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12669 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12670 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12671 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12672 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12673 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12675 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12677 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12678 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12680 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12681 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12682 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12684 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12688 ix86_init_builtins ()
12691 ix86_init_mmx_sse_builtins ();
12694 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12695 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12698 ix86_init_mmx_sse_builtins ()
12700 const struct builtin_description * d;
12703 tree pchar_type_node = build_pointer_type (char_type_node);
12704 tree pcchar_type_node = build_pointer_type (
12705 build_type_variant (char_type_node, 1, 0));
12706 tree pfloat_type_node = build_pointer_type (float_type_node);
12707 tree pcfloat_type_node = build_pointer_type (
12708 build_type_variant (float_type_node, 1, 0));
12709 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12710 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12711 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12714 tree int_ftype_v4sf_v4sf
12715 = build_function_type_list (integer_type_node,
12716 V4SF_type_node, V4SF_type_node, NULL_TREE);
12717 tree v4si_ftype_v4sf_v4sf
12718 = build_function_type_list (V4SI_type_node,
12719 V4SF_type_node, V4SF_type_node, NULL_TREE);
12720 /* MMX/SSE/integer conversions. */
12721 tree int_ftype_v4sf
12722 = build_function_type_list (integer_type_node,
12723 V4SF_type_node, NULL_TREE);
12724 tree int_ftype_v8qi
12725 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12726 tree v4sf_ftype_v4sf_int
12727 = build_function_type_list (V4SF_type_node,
12728 V4SF_type_node, integer_type_node, NULL_TREE);
12729 tree v4sf_ftype_v4sf_v2si
12730 = build_function_type_list (V4SF_type_node,
12731 V4SF_type_node, V2SI_type_node, NULL_TREE);
12732 tree int_ftype_v4hi_int
12733 = build_function_type_list (integer_type_node,
12734 V4HI_type_node, integer_type_node, NULL_TREE);
12735 tree v4hi_ftype_v4hi_int_int
12736 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12737 integer_type_node, integer_type_node,
12739 /* Miscellaneous. */
12740 tree v8qi_ftype_v4hi_v4hi
12741 = build_function_type_list (V8QI_type_node,
12742 V4HI_type_node, V4HI_type_node, NULL_TREE);
12743 tree v4hi_ftype_v2si_v2si
12744 = build_function_type_list (V4HI_type_node,
12745 V2SI_type_node, V2SI_type_node, NULL_TREE);
12746 tree v4sf_ftype_v4sf_v4sf_int
12747 = build_function_type_list (V4SF_type_node,
12748 V4SF_type_node, V4SF_type_node,
12749 integer_type_node, NULL_TREE);
12750 tree v2si_ftype_v4hi_v4hi
12751 = build_function_type_list (V2SI_type_node,
12752 V4HI_type_node, V4HI_type_node, NULL_TREE);
12753 tree v4hi_ftype_v4hi_int
12754 = build_function_type_list (V4HI_type_node,
12755 V4HI_type_node, integer_type_node, NULL_TREE);
12756 tree v4hi_ftype_v4hi_di
12757 = build_function_type_list (V4HI_type_node,
12758 V4HI_type_node, long_long_unsigned_type_node,
12760 tree v2si_ftype_v2si_di
12761 = build_function_type_list (V2SI_type_node,
12762 V2SI_type_node, long_long_unsigned_type_node,
12764 tree void_ftype_void
12765 = build_function_type (void_type_node, void_list_node);
12766 tree void_ftype_unsigned
12767 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12768 tree unsigned_ftype_void
12769 = build_function_type (unsigned_type_node, void_list_node);
12771 = build_function_type (long_long_unsigned_type_node, void_list_node);
12772 tree v4sf_ftype_void
12773 = build_function_type (V4SF_type_node, void_list_node);
12774 tree v2si_ftype_v4sf
12775 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12776 /* Loads/stores. */
12777 tree void_ftype_v8qi_v8qi_pchar
12778 = build_function_type_list (void_type_node,
12779 V8QI_type_node, V8QI_type_node,
12780 pchar_type_node, NULL_TREE);
12781 tree v4sf_ftype_pcfloat
12782 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12783 /* @@@ the type is bogus */
12784 tree v4sf_ftype_v4sf_pv2si
12785 = build_function_type_list (V4SF_type_node,
12786 V4SF_type_node, pv2si_type_node, NULL_TREE);
12787 tree void_ftype_pv2si_v4sf
12788 = build_function_type_list (void_type_node,
12789 pv2si_type_node, V4SF_type_node, NULL_TREE);
12790 tree void_ftype_pfloat_v4sf
12791 = build_function_type_list (void_type_node,
12792 pfloat_type_node, V4SF_type_node, NULL_TREE);
12793 tree void_ftype_pdi_di
12794 = build_function_type_list (void_type_node,
12795 pdi_type_node, long_long_unsigned_type_node,
12797 tree void_ftype_pv2di_v2di
12798 = build_function_type_list (void_type_node,
12799 pv2di_type_node, V2DI_type_node, NULL_TREE);
12800 /* Normal vector unops. */
12801 tree v4sf_ftype_v4sf
12802 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12804 /* Normal vector binops. */
12805 tree v4sf_ftype_v4sf_v4sf
12806 = build_function_type_list (V4SF_type_node,
12807 V4SF_type_node, V4SF_type_node, NULL_TREE);
12808 tree v8qi_ftype_v8qi_v8qi
12809 = build_function_type_list (V8QI_type_node,
12810 V8QI_type_node, V8QI_type_node, NULL_TREE);
12811 tree v4hi_ftype_v4hi_v4hi
12812 = build_function_type_list (V4HI_type_node,
12813 V4HI_type_node, V4HI_type_node, NULL_TREE);
12814 tree v2si_ftype_v2si_v2si
12815 = build_function_type_list (V2SI_type_node,
12816 V2SI_type_node, V2SI_type_node, NULL_TREE);
12817 tree di_ftype_di_di
12818 = build_function_type_list (long_long_unsigned_type_node,
12819 long_long_unsigned_type_node,
12820 long_long_unsigned_type_node, NULL_TREE);
12822 tree v2si_ftype_v2sf
12823 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12824 tree v2sf_ftype_v2si
12825 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12826 tree v2si_ftype_v2si
12827 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12828 tree v2sf_ftype_v2sf
12829 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12830 tree v2sf_ftype_v2sf_v2sf
12831 = build_function_type_list (V2SF_type_node,
12832 V2SF_type_node, V2SF_type_node, NULL_TREE);
12833 tree v2si_ftype_v2sf_v2sf
12834 = build_function_type_list (V2SI_type_node,
12835 V2SF_type_node, V2SF_type_node, NULL_TREE);
12836 tree pint_type_node = build_pointer_type (integer_type_node);
12837 tree pcint_type_node = build_pointer_type (
12838 build_type_variant (integer_type_node, 1, 0));
12839 tree pdouble_type_node = build_pointer_type (double_type_node);
12840 tree pcdouble_type_node = build_pointer_type (
12841 build_type_variant (double_type_node, 1, 0));
12842 tree int_ftype_v2df_v2df
12843 = build_function_type_list (integer_type_node,
12844 V2DF_type_node, V2DF_type_node, NULL_TREE);
12847 = build_function_type (intTI_type_node, void_list_node);
12848 tree v2di_ftype_void
12849 = build_function_type (V2DI_type_node, void_list_node);
12850 tree ti_ftype_ti_ti
12851 = build_function_type_list (intTI_type_node,
12852 intTI_type_node, intTI_type_node, NULL_TREE);
12853 tree void_ftype_pcvoid
12854 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12856 = build_function_type_list (V2DI_type_node,
12857 long_long_unsigned_type_node, NULL_TREE);
12859 = build_function_type_list (long_long_unsigned_type_node,
12860 V2DI_type_node, NULL_TREE);
12861 tree v4sf_ftype_v4si
12862 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12863 tree v4si_ftype_v4sf
12864 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12865 tree v2df_ftype_v4si
12866 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12867 tree v4si_ftype_v2df
12868 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12869 tree v2si_ftype_v2df
12870 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12871 tree v4sf_ftype_v2df
12872 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12873 tree v2df_ftype_v2si
12874 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12875 tree v2df_ftype_v4sf
12876 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12877 tree int_ftype_v2df
12878 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12879 tree v2df_ftype_v2df_int
12880 = build_function_type_list (V2DF_type_node,
12881 V2DF_type_node, integer_type_node, NULL_TREE);
12882 tree v4sf_ftype_v4sf_v2df
12883 = build_function_type_list (V4SF_type_node,
12884 V4SF_type_node, V2DF_type_node, NULL_TREE);
12885 tree v2df_ftype_v2df_v4sf
12886 = build_function_type_list (V2DF_type_node,
12887 V2DF_type_node, V4SF_type_node, NULL_TREE);
12888 tree v2df_ftype_v2df_v2df_int
12889 = build_function_type_list (V2DF_type_node,
12890 V2DF_type_node, V2DF_type_node,
12893 tree v2df_ftype_v2df_pv2si
12894 = build_function_type_list (V2DF_type_node,
12895 V2DF_type_node, pv2si_type_node, NULL_TREE);
12896 tree void_ftype_pv2si_v2df
12897 = build_function_type_list (void_type_node,
12898 pv2si_type_node, V2DF_type_node, NULL_TREE);
12899 tree void_ftype_pdouble_v2df
12900 = build_function_type_list (void_type_node,
12901 pdouble_type_node, V2DF_type_node, NULL_TREE);
12902 tree void_ftype_pint_int
12903 = build_function_type_list (void_type_node,
12904 pint_type_node, integer_type_node, NULL_TREE);
12905 tree void_ftype_v16qi_v16qi_pchar
12906 = build_function_type_list (void_type_node,
12907 V16QI_type_node, V16QI_type_node,
12908 pchar_type_node, NULL_TREE);
12909 tree v2df_ftype_pcdouble
12910 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12911 tree v2df_ftype_v2df_v2df
12912 = build_function_type_list (V2DF_type_node,
12913 V2DF_type_node, V2DF_type_node, NULL_TREE);
12914 tree v16qi_ftype_v16qi_v16qi
12915 = build_function_type_list (V16QI_type_node,
12916 V16QI_type_node, V16QI_type_node, NULL_TREE);
12917 tree v8hi_ftype_v8hi_v8hi
12918 = build_function_type_list (V8HI_type_node,
12919 V8HI_type_node, V8HI_type_node, NULL_TREE);
12920 tree v4si_ftype_v4si_v4si
12921 = build_function_type_list (V4SI_type_node,
12922 V4SI_type_node, V4SI_type_node, NULL_TREE);
12923 tree v2di_ftype_v2di_v2di
12924 = build_function_type_list (V2DI_type_node,
12925 V2DI_type_node, V2DI_type_node, NULL_TREE);
12926 tree v2di_ftype_v2df_v2df
12927 = build_function_type_list (V2DI_type_node,
12928 V2DF_type_node, V2DF_type_node, NULL_TREE);
12929 tree v2df_ftype_v2df
12930 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12931 tree v2df_ftype_double
12932 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12933 tree v2df_ftype_double_double
12934 = build_function_type_list (V2DF_type_node,
12935 double_type_node, double_type_node, NULL_TREE);
12936 tree int_ftype_v8hi_int
12937 = build_function_type_list (integer_type_node,
12938 V8HI_type_node, integer_type_node, NULL_TREE);
12939 tree v8hi_ftype_v8hi_int_int
12940 = build_function_type_list (V8HI_type_node,
12941 V8HI_type_node, integer_type_node,
12942 integer_type_node, NULL_TREE);
12943 tree v2di_ftype_v2di_int
12944 = build_function_type_list (V2DI_type_node,
12945 V2DI_type_node, integer_type_node, NULL_TREE);
12946 tree v4si_ftype_v4si_int
12947 = build_function_type_list (V4SI_type_node,
12948 V4SI_type_node, integer_type_node, NULL_TREE);
12949 tree v8hi_ftype_v8hi_int
12950 = build_function_type_list (V8HI_type_node,
12951 V8HI_type_node, integer_type_node, NULL_TREE);
12952 tree v8hi_ftype_v8hi_v2di
12953 = build_function_type_list (V8HI_type_node,
12954 V8HI_type_node, V2DI_type_node, NULL_TREE);
12955 tree v4si_ftype_v4si_v2di
12956 = build_function_type_list (V4SI_type_node,
12957 V4SI_type_node, V2DI_type_node, NULL_TREE);
12958 tree v4si_ftype_v8hi_v8hi
12959 = build_function_type_list (V4SI_type_node,
12960 V8HI_type_node, V8HI_type_node, NULL_TREE);
12961 tree di_ftype_v8qi_v8qi
12962 = build_function_type_list (long_long_unsigned_type_node,
12963 V8QI_type_node, V8QI_type_node, NULL_TREE);
12964 tree v2di_ftype_v16qi_v16qi
12965 = build_function_type_list (V2DI_type_node,
12966 V16QI_type_node, V16QI_type_node, NULL_TREE);
12967 tree int_ftype_v16qi
12968 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12969 tree v16qi_ftype_pcchar
12970 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12971 tree void_ftype_pchar_v16qi
12972 = build_function_type_list (void_type_node,
12973 pchar_type_node, V16QI_type_node, NULL_TREE);
12974 tree v4si_ftype_pcint
12975 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12976 tree void_ftype_pcint_v4si
12977 = build_function_type_list (void_type_node,
12978 pcint_type_node, V4SI_type_node, NULL_TREE);
12979 tree v2di_ftype_v2di
12980 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12982 /* Add all builtins that are more or less simple operations on two
12984 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12986 /* Use one of the operands; the target can have a different mode for
12987 mask-generating compares. */
12988 enum machine_mode mode;
12993 mode = insn_data[d->icode].operand[1].mode;
12998 type = v16qi_ftype_v16qi_v16qi;
13001 type = v8hi_ftype_v8hi_v8hi;
13004 type = v4si_ftype_v4si_v4si;
13007 type = v2di_ftype_v2di_v2di;
13010 type = v2df_ftype_v2df_v2df;
13013 type = ti_ftype_ti_ti;
13016 type = v4sf_ftype_v4sf_v4sf;
13019 type = v8qi_ftype_v8qi_v8qi;
13022 type = v4hi_ftype_v4hi_v4hi;
13025 type = v2si_ftype_v2si_v2si;
13028 type = di_ftype_di_di;
13035 /* Override for comparisons. */
13036 if (d->icode == CODE_FOR_maskcmpv4sf3
13037 || d->icode == CODE_FOR_maskncmpv4sf3
13038 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13039 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13040 type = v4si_ftype_v4sf_v4sf;
13042 if (d->icode == CODE_FOR_maskcmpv2df3
13043 || d->icode == CODE_FOR_maskncmpv2df3
13044 || d->icode == CODE_FOR_vmmaskcmpv2df3
13045 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13046 type = v2di_ftype_v2df_v2df;
13048 def_builtin (d->mask, d->name, type, d->code);
13051 /* Add the remaining MMX insns with somewhat more complicated types. */
13052 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13053 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13054 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13055 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13056 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13058 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13059 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13060 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13062 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13063 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13065 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13066 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13068 /* comi/ucomi insns. */
13069 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13070 if (d->mask == MASK_SSE2)
13071 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13073 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13075 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13076 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13077 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13079 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13080 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13081 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13082 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13083 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13084 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13085 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13086 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13088 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13089 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13091 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13093 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13094 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13095 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13096 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13097 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13098 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13100 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13101 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13102 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13103 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13105 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13106 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13107 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13108 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13110 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13112 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13114 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13115 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13116 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13117 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13118 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13119 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13121 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13123 /* Original 3DNow! */
13124 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13125 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13126 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13127 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13128 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13129 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13130 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13131 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13132 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13133 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13134 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13135 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13136 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13137 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13138 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13139 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13140 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13141 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13142 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13143 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13145 /* 3DNow! extension as used in the Athlon CPU. */
13146 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13147 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13148 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13149 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13150 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13151 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13153 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13156 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13157 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13159 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13160 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13161 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13163 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13164 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13165 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13166 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13167 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13168 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13170 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13171 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13172 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13173 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13175 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13176 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13177 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13178 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13179 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13181 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13182 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13183 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13184 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13186 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13187 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13189 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13191 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13192 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13194 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13195 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13196 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13197 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13198 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13200 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13202 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13203 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13205 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13206 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13207 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13209 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13210 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13211 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13213 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13214 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13215 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13216 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13217 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13218 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13219 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13221 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13222 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13223 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13225 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13226 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13227 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13228 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13229 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13230 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13231 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13233 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13235 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13236 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13237 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13239 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13240 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13241 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13243 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13244 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13246 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13247 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13248 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13249 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13251 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13252 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13253 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13254 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13256 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13257 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13259 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13262 /* Errors in the source file can cause expand_expr to return const0_rtx
13263 where we expect a vector. To avoid crashing, use one of the vector
13264 clear instructions. */
13266 safe_vector_operand (x, mode)
13268 enum machine_mode mode;
13270 if (x != const0_rtx)
13272 x = gen_reg_rtx (mode);
13274 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13275 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13276 : gen_rtx_SUBREG (DImode, x, 0)));
13278 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13279 : gen_rtx_SUBREG (V4SFmode, x, 0),
13280 CONST0_RTX (V4SFmode)));
13284 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13287 ix86_expand_binop_builtin (icode, arglist, target)
13288 enum insn_code icode;
13293 tree arg0 = TREE_VALUE (arglist);
13294 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13295 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13296 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13297 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13298 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13299 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13301 if (VECTOR_MODE_P (mode0))
13302 op0 = safe_vector_operand (op0, mode0);
13303 if (VECTOR_MODE_P (mode1))
13304 op1 = safe_vector_operand (op1, mode1);
13307 || GET_MODE (target) != tmode
13308 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13309 target = gen_reg_rtx (tmode);
13311 /* In case the insn wants input operands in modes different from
13312 the result, abort. */
13313 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13316 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13317 op0 = copy_to_mode_reg (mode0, op0);
13318 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13319 op1 = copy_to_mode_reg (mode1, op1);
13321 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13322 yet one of the two must not be a memory. This is normally enforced
13323 by expanders, but we didn't bother to create one here. */
13324 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13325 op0 = copy_to_mode_reg (mode0, op0);
13327 pat = GEN_FCN (icode) (target, op0, op1);
13334 /* Subroutine of ix86_expand_builtin to take care of stores. */
13337 ix86_expand_store_builtin (icode, arglist)
13338 enum insn_code icode;
13342 tree arg0 = TREE_VALUE (arglist);
13343 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13344 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13345 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13346 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13347 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13349 if (VECTOR_MODE_P (mode1))
13350 op1 = safe_vector_operand (op1, mode1);
13352 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13354 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13355 op1 = copy_to_mode_reg (mode1, op1);
13357 pat = GEN_FCN (icode) (op0, op1);
13363 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13366 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13367 enum insn_code icode;
13373 tree arg0 = TREE_VALUE (arglist);
13374 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13375 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13376 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13379 || GET_MODE (target) != tmode
13380 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13381 target = gen_reg_rtx (tmode);
13383 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13386 if (VECTOR_MODE_P (mode0))
13387 op0 = safe_vector_operand (op0, mode0);
13389 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13390 op0 = copy_to_mode_reg (mode0, op0);
13393 pat = GEN_FCN (icode) (target, op0);
13400 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13401 sqrtss, rsqrtss, rcpss. */
13404 ix86_expand_unop1_builtin (icode, arglist, target)
13405 enum insn_code icode;
13410 tree arg0 = TREE_VALUE (arglist);
13411 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13412 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13413 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13416 || GET_MODE (target) != tmode
13417 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13418 target = gen_reg_rtx (tmode);
13420 if (VECTOR_MODE_P (mode0))
13421 op0 = safe_vector_operand (op0, mode0);
13423 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13424 op0 = copy_to_mode_reg (mode0, op0);
13427 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13428 op1 = copy_to_mode_reg (mode0, op1);
13430 pat = GEN_FCN (icode) (target, op0, op1);
13437 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13440 ix86_expand_sse_compare (d, arglist, target)
13441 const struct builtin_description *d;
13446 tree arg0 = TREE_VALUE (arglist);
13447 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13448 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13449 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13451 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13452 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13453 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13454 enum rtx_code comparison = d->comparison;
13456 if (VECTOR_MODE_P (mode0))
13457 op0 = safe_vector_operand (op0, mode0);
13458 if (VECTOR_MODE_P (mode1))
13459 op1 = safe_vector_operand (op1, mode1);
13461 /* Swap operands if we have a comparison that isn't available in
13465 rtx tmp = gen_reg_rtx (mode1);
13466 emit_move_insn (tmp, op1);
13472 || GET_MODE (target) != tmode
13473 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13474 target = gen_reg_rtx (tmode);
13476 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13477 op0 = copy_to_mode_reg (mode0, op0);
13478 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13479 op1 = copy_to_mode_reg (mode1, op1);
13481 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13482 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13489 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13492 ix86_expand_sse_comi (d, arglist, target)
13493 const struct builtin_description *d;
13498 tree arg0 = TREE_VALUE (arglist);
13499 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13500 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13501 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13503 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13504 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13505 enum rtx_code comparison = d->comparison;
13507 if (VECTOR_MODE_P (mode0))
13508 op0 = safe_vector_operand (op0, mode0);
13509 if (VECTOR_MODE_P (mode1))
13510 op1 = safe_vector_operand (op1, mode1);
13512 /* Swap operands if we have a comparison that isn't available in
13521 target = gen_reg_rtx (SImode);
13522 emit_move_insn (target, const0_rtx);
13523 target = gen_rtx_SUBREG (QImode, target, 0);
13525 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13526 op0 = copy_to_mode_reg (mode0, op0);
13527 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13528 op1 = copy_to_mode_reg (mode1, op1);
13530 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13531 pat = GEN_FCN (d->icode) (op0, op1);
13535 emit_insn (gen_rtx_SET (VOIDmode,
13536 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13537 gen_rtx_fmt_ee (comparison, QImode,
13541 return SUBREG_REG (target);
13544 /* Expand an expression EXP that calls a built-in function,
13545 with result going to TARGET if that's convenient
13546 (and in mode MODE if that's convenient).
13547 SUBTARGET may be used as the target for computing one of EXP's operands.
13548 IGNORE is nonzero if the value is to be ignored. */
13551 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13554 rtx subtarget ATTRIBUTE_UNUSED;
13555 enum machine_mode mode ATTRIBUTE_UNUSED;
13556 int ignore ATTRIBUTE_UNUSED;
13558 const struct builtin_description *d;
13560 enum insn_code icode;
13561 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13562 tree arglist = TREE_OPERAND (exp, 1);
13563 tree arg0, arg1, arg2;
13564 rtx op0, op1, op2, pat;
13565 enum machine_mode tmode, mode0, mode1, mode2;
13566 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13570 case IX86_BUILTIN_EMMS:
13571 emit_insn (gen_emms ());
13574 case IX86_BUILTIN_SFENCE:
13575 emit_insn (gen_sfence ());
13578 case IX86_BUILTIN_PEXTRW:
13579 case IX86_BUILTIN_PEXTRW128:
13580 icode = (fcode == IX86_BUILTIN_PEXTRW
13581 ? CODE_FOR_mmx_pextrw
13582 : CODE_FOR_sse2_pextrw);
13583 arg0 = TREE_VALUE (arglist);
13584 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13585 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13586 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13587 tmode = insn_data[icode].operand[0].mode;
13588 mode0 = insn_data[icode].operand[1].mode;
13589 mode1 = insn_data[icode].operand[2].mode;
13591 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13592 op0 = copy_to_mode_reg (mode0, op0);
13593 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13595 /* @@@ better error message */
13596 error ("selector must be an immediate");
13597 return gen_reg_rtx (tmode);
13600 || GET_MODE (target) != tmode
13601 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13602 target = gen_reg_rtx (tmode);
13603 pat = GEN_FCN (icode) (target, op0, op1);
13609 case IX86_BUILTIN_PINSRW:
13610 case IX86_BUILTIN_PINSRW128:
13611 icode = (fcode == IX86_BUILTIN_PINSRW
13612 ? CODE_FOR_mmx_pinsrw
13613 : CODE_FOR_sse2_pinsrw);
13614 arg0 = TREE_VALUE (arglist);
13615 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13616 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13617 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13618 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13619 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13620 tmode = insn_data[icode].operand[0].mode;
13621 mode0 = insn_data[icode].operand[1].mode;
13622 mode1 = insn_data[icode].operand[2].mode;
13623 mode2 = insn_data[icode].operand[3].mode;
13625 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13626 op0 = copy_to_mode_reg (mode0, op0);
13627 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13628 op1 = copy_to_mode_reg (mode1, op1);
13629 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13631 /* @@@ better error message */
13632 error ("selector must be an immediate");
13636 || GET_MODE (target) != tmode
13637 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13638 target = gen_reg_rtx (tmode);
13639 pat = GEN_FCN (icode) (target, op0, op1, op2);
13645 case IX86_BUILTIN_MASKMOVQ:
13646 case IX86_BUILTIN_MASKMOVDQU:
13647 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13648 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13649 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13650 : CODE_FOR_sse2_maskmovdqu));
13651 /* Note the arg order is different from the operand order. */
13652 arg1 = TREE_VALUE (arglist);
13653 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13654 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13655 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13656 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13657 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13658 mode0 = insn_data[icode].operand[0].mode;
13659 mode1 = insn_data[icode].operand[1].mode;
13660 mode2 = insn_data[icode].operand[2].mode;
13662 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13663 op0 = copy_to_mode_reg (mode0, op0);
13664 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13665 op1 = copy_to_mode_reg (mode1, op1);
13666 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13667 op2 = copy_to_mode_reg (mode2, op2);
13668 pat = GEN_FCN (icode) (op0, op1, op2);
13674 case IX86_BUILTIN_SQRTSS:
13675 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13676 case IX86_BUILTIN_RSQRTSS:
13677 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13678 case IX86_BUILTIN_RCPSS:
13679 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13681 case IX86_BUILTIN_LOADAPS:
13682 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13684 case IX86_BUILTIN_LOADUPS:
13685 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13687 case IX86_BUILTIN_STOREAPS:
13688 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13690 case IX86_BUILTIN_STOREUPS:
13691 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13693 case IX86_BUILTIN_LOADSS:
13694 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13696 case IX86_BUILTIN_STORESS:
13697 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13699 case IX86_BUILTIN_LOADHPS:
13700 case IX86_BUILTIN_LOADLPS:
13701 case IX86_BUILTIN_LOADHPD:
13702 case IX86_BUILTIN_LOADLPD:
13703 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13704 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13705 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13706 : CODE_FOR_sse2_movlpd);
13707 arg0 = TREE_VALUE (arglist);
13708 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13709 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13710 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13711 tmode = insn_data[icode].operand[0].mode;
13712 mode0 = insn_data[icode].operand[1].mode;
13713 mode1 = insn_data[icode].operand[2].mode;
13715 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13716 op0 = copy_to_mode_reg (mode0, op0);
13717 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13719 || GET_MODE (target) != tmode
13720 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13721 target = gen_reg_rtx (tmode);
13722 pat = GEN_FCN (icode) (target, op0, op1);
13728 case IX86_BUILTIN_STOREHPS:
13729 case IX86_BUILTIN_STORELPS:
13730 case IX86_BUILTIN_STOREHPD:
13731 case IX86_BUILTIN_STORELPD:
13732 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13733 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13734 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13735 : CODE_FOR_sse2_movlpd);
13736 arg0 = TREE_VALUE (arglist);
13737 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13738 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13739 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13740 mode0 = insn_data[icode].operand[1].mode;
13741 mode1 = insn_data[icode].operand[2].mode;
13743 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13744 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13745 op1 = copy_to_mode_reg (mode1, op1);
13747 pat = GEN_FCN (icode) (op0, op0, op1);
13753 case IX86_BUILTIN_MOVNTPS:
13754 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13755 case IX86_BUILTIN_MOVNTQ:
13756 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13758 case IX86_BUILTIN_LDMXCSR:
13759 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13760 target = assign_386_stack_local (SImode, 0);
13761 emit_move_insn (target, op0);
13762 emit_insn (gen_ldmxcsr (target));
13765 case IX86_BUILTIN_STMXCSR:
13766 target = assign_386_stack_local (SImode, 0);
13767 emit_insn (gen_stmxcsr (target));
13768 return copy_to_mode_reg (SImode, target);
13770 case IX86_BUILTIN_SHUFPS:
13771 case IX86_BUILTIN_SHUFPD:
13772 icode = (fcode == IX86_BUILTIN_SHUFPS
13773 ? CODE_FOR_sse_shufps
13774 : CODE_FOR_sse2_shufpd);
13775 arg0 = TREE_VALUE (arglist);
13776 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13777 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13778 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13779 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13780 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13781 tmode = insn_data[icode].operand[0].mode;
13782 mode0 = insn_data[icode].operand[1].mode;
13783 mode1 = insn_data[icode].operand[2].mode;
13784 mode2 = insn_data[icode].operand[3].mode;
13786 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13787 op0 = copy_to_mode_reg (mode0, op0);
13788 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13789 op1 = copy_to_mode_reg (mode1, op1);
13790 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13792 /* @@@ better error message */
13793 error ("mask must be an immediate");
13794 return gen_reg_rtx (tmode);
13797 || GET_MODE (target) != tmode
13798 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13799 target = gen_reg_rtx (tmode);
13800 pat = GEN_FCN (icode) (target, op0, op1, op2);
13806 case IX86_BUILTIN_PSHUFW:
13807 case IX86_BUILTIN_PSHUFD:
13808 case IX86_BUILTIN_PSHUFHW:
13809 case IX86_BUILTIN_PSHUFLW:
13810 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13811 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13812 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13813 : CODE_FOR_mmx_pshufw);
13814 arg0 = TREE_VALUE (arglist);
13815 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13816 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13817 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13818 tmode = insn_data[icode].operand[0].mode;
13819 mode1 = insn_data[icode].operand[1].mode;
13820 mode2 = insn_data[icode].operand[2].mode;
13822 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13823 op0 = copy_to_mode_reg (mode1, op0);
13824 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13826 /* @@@ better error message */
13827 error ("mask must be an immediate");
13831 || GET_MODE (target) != tmode
13832 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13833 target = gen_reg_rtx (tmode);
13834 pat = GEN_FCN (icode) (target, op0, op1);
13840 case IX86_BUILTIN_PSLLDQI128:
13841 case IX86_BUILTIN_PSRLDQI128:
13842 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13843 : CODE_FOR_sse2_lshrti3);
13844 arg0 = TREE_VALUE (arglist);
13845 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13846 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13847 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13848 tmode = insn_data[icode].operand[0].mode;
13849 mode1 = insn_data[icode].operand[1].mode;
13850 mode2 = insn_data[icode].operand[2].mode;
13852 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13854 op0 = copy_to_reg (op0);
13855 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13857 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13859 error ("shift must be an immediate");
13862 target = gen_reg_rtx (V2DImode);
13863 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13869 case IX86_BUILTIN_FEMMS:
13870 emit_insn (gen_femms ());
13873 case IX86_BUILTIN_PAVGUSB:
13874 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13876 case IX86_BUILTIN_PF2ID:
13877 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13879 case IX86_BUILTIN_PFACC:
13880 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13882 case IX86_BUILTIN_PFADD:
13883 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13885 case IX86_BUILTIN_PFCMPEQ:
13886 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13888 case IX86_BUILTIN_PFCMPGE:
13889 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13891 case IX86_BUILTIN_PFCMPGT:
13892 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13894 case IX86_BUILTIN_PFMAX:
13895 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13897 case IX86_BUILTIN_PFMIN:
13898 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13900 case IX86_BUILTIN_PFMUL:
13901 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13903 case IX86_BUILTIN_PFRCP:
13904 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13906 case IX86_BUILTIN_PFRCPIT1:
13907 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13909 case IX86_BUILTIN_PFRCPIT2:
13910 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13912 case IX86_BUILTIN_PFRSQIT1:
13913 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13915 case IX86_BUILTIN_PFRSQRT:
13916 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13918 case IX86_BUILTIN_PFSUB:
13919 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13921 case IX86_BUILTIN_PFSUBR:
13922 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13924 case IX86_BUILTIN_PI2FD:
13925 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13927 case IX86_BUILTIN_PMULHRW:
13928 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13930 case IX86_BUILTIN_PF2IW:
13931 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13933 case IX86_BUILTIN_PFNACC:
13934 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13936 case IX86_BUILTIN_PFPNACC:
13937 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13939 case IX86_BUILTIN_PI2FW:
13940 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13942 case IX86_BUILTIN_PSWAPDSI:
13943 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13945 case IX86_BUILTIN_PSWAPDSF:
13946 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13948 case IX86_BUILTIN_SSE_ZERO:
13949 target = gen_reg_rtx (V4SFmode);
13950 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13953 case IX86_BUILTIN_MMX_ZERO:
13954 target = gen_reg_rtx (DImode);
13955 emit_insn (gen_mmx_clrdi (target));
13958 case IX86_BUILTIN_CLRTI:
13959 target = gen_reg_rtx (V2DImode);
13960 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13964 case IX86_BUILTIN_SQRTSD:
13965 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13966 case IX86_BUILTIN_LOADAPD:
13967 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13968 case IX86_BUILTIN_LOADUPD:
13969 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13971 case IX86_BUILTIN_STOREAPD:
13972 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13973 case IX86_BUILTIN_STOREUPD:
13974 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13976 case IX86_BUILTIN_LOADSD:
13977 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13979 case IX86_BUILTIN_STORESD:
13980 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13982 case IX86_BUILTIN_SETPD1:
13983 target = assign_386_stack_local (DFmode, 0);
13984 arg0 = TREE_VALUE (arglist);
13985 emit_move_insn (adjust_address (target, DFmode, 0),
13986 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13987 op0 = gen_reg_rtx (V2DFmode);
13988 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13989 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13992 case IX86_BUILTIN_SETPD:
13993 target = assign_386_stack_local (V2DFmode, 0);
13994 arg0 = TREE_VALUE (arglist);
13995 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13996 emit_move_insn (adjust_address (target, DFmode, 0),
13997 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13998 emit_move_insn (adjust_address (target, DFmode, 8),
13999 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14000 op0 = gen_reg_rtx (V2DFmode);
14001 emit_insn (gen_sse2_movapd (op0, target));
14004 case IX86_BUILTIN_LOADRPD:
14005 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14006 gen_reg_rtx (V2DFmode), 1);
14007 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14010 case IX86_BUILTIN_LOADPD1:
14011 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14012 gen_reg_rtx (V2DFmode), 1);
14013 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14016 case IX86_BUILTIN_STOREPD1:
14017 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14018 case IX86_BUILTIN_STORERPD:
14019 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14021 case IX86_BUILTIN_CLRPD:
14022 target = gen_reg_rtx (V2DFmode);
14023 emit_insn (gen_sse_clrv2df (target));
14026 case IX86_BUILTIN_MFENCE:
14027 emit_insn (gen_sse2_mfence ());
14029 case IX86_BUILTIN_LFENCE:
14030 emit_insn (gen_sse2_lfence ());
14033 case IX86_BUILTIN_CLFLUSH:
14034 arg0 = TREE_VALUE (arglist);
14035 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14036 icode = CODE_FOR_sse2_clflush;
14037 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14038 op0 = copy_to_mode_reg (Pmode, op0);
14040 emit_insn (gen_sse2_clflush (op0));
14043 case IX86_BUILTIN_MOVNTPD:
14044 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14045 case IX86_BUILTIN_MOVNTDQ:
14046 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14047 case IX86_BUILTIN_MOVNTI:
14048 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14050 case IX86_BUILTIN_LOADDQA:
14051 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14052 case IX86_BUILTIN_LOADDQU:
14053 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14054 case IX86_BUILTIN_LOADD:
14055 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14057 case IX86_BUILTIN_STOREDQA:
14058 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14059 case IX86_BUILTIN_STOREDQU:
14060 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14061 case IX86_BUILTIN_STORED:
14062 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14068 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14069 if (d->code == fcode)
14071 /* Compares are treated specially. */
14072 if (d->icode == CODE_FOR_maskcmpv4sf3
14073 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14074 || d->icode == CODE_FOR_maskncmpv4sf3
14075 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14076 || d->icode == CODE_FOR_maskcmpv2df3
14077 || d->icode == CODE_FOR_vmmaskcmpv2df3
14078 || d->icode == CODE_FOR_maskncmpv2df3
14079 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14080 return ix86_expand_sse_compare (d, arglist, target);
14082 return ix86_expand_binop_builtin (d->icode, arglist, target);
14085 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14086 if (d->code == fcode)
14087 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14089 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14090 if (d->code == fcode)
14091 return ix86_expand_sse_comi (d, arglist, target);
14093 /* @@@ Should really do something sensible here. */
14097 /* Store OPERAND to the memory after reload is completed. This means
14098 that we can't easily use assign_stack_local. */
14100 ix86_force_to_memory (mode, operand)
14101 enum machine_mode mode;
14105 if (!reload_completed)
14107 if (TARGET_64BIT && TARGET_RED_ZONE)
14109 result = gen_rtx_MEM (mode,
14110 gen_rtx_PLUS (Pmode,
14112 GEN_INT (-RED_ZONE_SIZE)));
14113 emit_move_insn (result, operand);
14115 else if (TARGET_64BIT && !TARGET_RED_ZONE)
14121 operand = gen_lowpart (DImode, operand);
14125 gen_rtx_SET (VOIDmode,
14126 gen_rtx_MEM (DImode,
14127 gen_rtx_PRE_DEC (DImode,
14128 stack_pointer_rtx)),
14134 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14143 split_di (&operand, 1, operands, operands + 1);
14145 gen_rtx_SET (VOIDmode,
14146 gen_rtx_MEM (SImode,
14147 gen_rtx_PRE_DEC (Pmode,
14148 stack_pointer_rtx)),
14151 gen_rtx_SET (VOIDmode,
14152 gen_rtx_MEM (SImode,
14153 gen_rtx_PRE_DEC (Pmode,
14154 stack_pointer_rtx)),
14159 /* It is better to store HImodes as SImodes. */
14160 if (!TARGET_PARTIAL_REG_STALL)
14161 operand = gen_lowpart (SImode, operand);
14165 gen_rtx_SET (VOIDmode,
14166 gen_rtx_MEM (GET_MODE (operand),
14167 gen_rtx_PRE_DEC (SImode,
14168 stack_pointer_rtx)),
14174 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14179 /* Free operand from the memory. */
14181 ix86_free_from_memory (mode)
14182 enum machine_mode mode;
14184 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14188 if (mode == DImode || TARGET_64BIT)
14190 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14194 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14195 to pop or add instruction if registers are available. */
14196 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14197 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14202 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14203 QImode must go into class Q_REGS.
14204 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14205 movdf to do mem-to-mem moves through integer regs. */
14207 ix86_preferred_reload_class (x, class)
14209 enum reg_class class;
14211 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14213 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14215 /* SSE can't load any constant directly yet. */
14216 if (SSE_CLASS_P (class))
14218 /* Floats can load 0 and 1. */
14219 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14221 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14222 if (MAYBE_SSE_CLASS_P (class))
14223 return (reg_class_subset_p (class, GENERAL_REGS)
14224 ? GENERAL_REGS : FLOAT_REGS);
14228 /* General regs can load everything. */
14229 if (reg_class_subset_p (class, GENERAL_REGS))
14230 return GENERAL_REGS;
14231 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14232 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14235 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14237 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14242 /* If we are copying between general and FP registers, we need a memory
14243 location. The same is true for SSE and MMX registers.
14245 The macro can't work reliably when one of the CLASSES is class containing
14246 registers from multiple units (SSE, MMX, integer). We avoid this by never
14247 combining those units in single alternative in the machine description.
14248 Ensure that this constraint holds to avoid unexpected surprises.
14250 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14251 enforce these sanity checks. */
14253 ix86_secondary_memory_needed (class1, class2, mode, strict)
14254 enum reg_class class1, class2;
14255 enum machine_mode mode;
14258 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14259 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14260 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14261 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14262 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14263 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14270 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14271 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14272 && (mode) != SImode)
14273 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14274 && (mode) != SImode));
14276 /* Return the cost of moving data from a register in class CLASS1 to
14277 one in class CLASS2.
14279 It is not required that the cost always equal 2 when FROM is the same as TO;
14280 on some machines it is expensive to move between registers if they are not
14281 general registers. */
14283 ix86_register_move_cost (mode, class1, class2)
14284 enum machine_mode mode;
14285 enum reg_class class1, class2;
14287 /* In case we require secondary memory, compute cost of the store followed
14288 by load. In order to avoid bad register allocation choices, we need
14289 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14291 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14295 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14296 MEMORY_MOVE_COST (mode, class1, 1));
14297 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14298 MEMORY_MOVE_COST (mode, class2, 1));
14300 /* In case of copying from general_purpose_register we may emit multiple
14301 stores followed by single load causing memory size mismatch stall.
14302 Count this as arbitrarily high cost of 20. */
14303 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14306 /* In the case of FP/MMX moves, the registers actually overlap, and we
14307 have to switch modes in order to treat them differently. */
14308 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14309 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14315 /* Moves between SSE/MMX and integer unit are expensive. */
14316 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14317 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14318 return ix86_cost->mmxsse_to_integer;
14319 if (MAYBE_FLOAT_CLASS_P (class1))
14320 return ix86_cost->fp_move;
14321 if (MAYBE_SSE_CLASS_P (class1))
14322 return ix86_cost->sse_move;
14323 if (MAYBE_MMX_CLASS_P (class1))
14324 return ix86_cost->mmx_move;
14328 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14330 ix86_hard_regno_mode_ok (regno, mode)
14332 enum machine_mode mode;
14334 /* Flags and only flags can only hold CCmode values. */
14335 if (CC_REGNO_P (regno))
14336 return GET_MODE_CLASS (mode) == MODE_CC;
14337 if (GET_MODE_CLASS (mode) == MODE_CC
14338 || GET_MODE_CLASS (mode) == MODE_RANDOM
14339 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14341 if (FP_REGNO_P (regno))
14342 return VALID_FP_MODE_P (mode);
14343 if (SSE_REGNO_P (regno))
14344 return VALID_SSE_REG_MODE (mode);
14345 if (MMX_REGNO_P (regno))
14346 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
14347 /* We handle both integer and floats in the general purpose registers.
14348 In future we should be able to handle vector modes as well. */
14349 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14351 /* Take care for QImode values - they can be in non-QI regs, but then
14352 they do cause partial register stalls. */
14353 if (regno < 4 || mode != QImode || TARGET_64BIT)
14355 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14358 /* Return the cost of moving data of mode M between a
14359 register and memory. A value of 2 is the default; this cost is
14360 relative to those in `REGISTER_MOVE_COST'.
14362 If moving between registers and memory is more expensive than
14363 between two registers, you should define this macro to express the
14366 Model also increased moving costs of QImode registers in non
14370 ix86_memory_move_cost (mode, class, in)
14371 enum machine_mode mode;
14372 enum reg_class class;
14375 if (FLOAT_CLASS_P (class))
14393 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14395 if (SSE_CLASS_P (class))
14398 switch (GET_MODE_SIZE (mode))
14412 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14414 if (MMX_CLASS_P (class))
14417 switch (GET_MODE_SIZE (mode))
14428 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14430 switch (GET_MODE_SIZE (mode))
14434 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14435 : ix86_cost->movzbl_load);
14437 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14438 : ix86_cost->int_store[0] + 4);
14441 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14443 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14444 if (mode == TFmode)
14446 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14447 * ((int) GET_MODE_SIZE (mode)
14448 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14452 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14454 ix86_svr3_asm_out_constructor (symbol, priority)
14456 int priority ATTRIBUTE_UNUSED;
14459 fputs ("\tpushl $", asm_out_file);
14460 assemble_name (asm_out_file, XSTR (symbol, 0));
14461 fputc ('\n', asm_out_file);
14467 static int current_machopic_label_num;
14469 /* Given a symbol name and its associated stub, write out the
14470 definition of the stub. */
14473 machopic_output_stub (file, symb, stub)
14475 const char *symb, *stub;
14477 unsigned int length;
14478 char *binder_name, *symbol_name, lazy_ptr_name[32];
14479 int label = ++current_machopic_label_num;
14481 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14482 symb = (*targetm.strip_name_encoding) (symb);
14484 length = strlen (stub);
14485 binder_name = alloca (length + 32);
14486 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14488 length = strlen (symb);
14489 symbol_name = alloca (length + 32);
14490 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14492 sprintf (lazy_ptr_name, "L%d$lz", label);
14495 machopic_picsymbol_stub_section ();
14497 machopic_symbol_stub_section ();
14499 fprintf (file, "%s:\n", stub);
14500 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14504 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14505 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14506 fprintf (file, "\tjmp %%edx\n");
14509 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14511 fprintf (file, "%s:\n", binder_name);
14515 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14516 fprintf (file, "\tpushl %%eax\n");
14519 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14521 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14523 machopic_lazy_symbol_ptr_section ();
14524 fprintf (file, "%s:\n", lazy_ptr_name);
14525 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14526 fprintf (file, "\t.long %s\n", binder_name);
14528 #endif /* TARGET_MACHO */
14530 /* Order the registers for register allocator. */
14533 x86_order_regs_for_local_alloc ()
14538 /* First allocate the local general purpose registers. */
14539 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14540 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14541 reg_alloc_order [pos++] = i;
14543 /* Global general purpose registers. */
14544 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14545 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14546 reg_alloc_order [pos++] = i;
14548 /* x87 registers come first in case we are doing FP math
14550 if (!TARGET_SSE_MATH)
14551 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14552 reg_alloc_order [pos++] = i;
14554 /* SSE registers. */
14555 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14556 reg_alloc_order [pos++] = i;
14557 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14558 reg_alloc_order [pos++] = i;
14560 /* x87 registers. */
14561 if (TARGET_SSE_MATH)
14562 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14563 reg_alloc_order [pos++] = i;
14565 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14566 reg_alloc_order [pos++] = i;
14568 /* Initialize the rest of array as we do not allocate some registers
14570 while (pos < FIRST_PSEUDO_REGISTER)
14571 reg_alloc_order [pos++] = 0;
14574 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14575 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14578 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14579 struct attribute_spec.handler. */
14581 ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
14584 tree args ATTRIBUTE_UNUSED;
14585 int flags ATTRIBUTE_UNUSED;
14586 bool *no_add_attrs;
14589 if (DECL_P (*node))
14591 if (TREE_CODE (*node) == TYPE_DECL)
14592 type = &TREE_TYPE (*node);
14597 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14598 || TREE_CODE (*type) == UNION_TYPE)))
14600 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
14601 *no_add_attrs = true;
14604 else if ((is_attribute_p ("ms_struct", name)
14605 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
14606 || ((is_attribute_p ("gcc_struct", name)
14607 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
14609 warning ("`%s' incompatible attribute ignored",
14610 IDENTIFIER_POINTER (name));
14611 *no_add_attrs = true;
14618 ix86_ms_bitfield_layout_p (record_type)
14621 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
14622 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
14623 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
14626 /* Returns an expression indicating where the this parameter is
14627 located on entry to the FUNCTION. */
14630 x86_this_parameter (function)
14633 tree type = TREE_TYPE (function);
14637 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14638 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14641 if (ix86_fntype_regparm (type) > 0)
14645 parm = TYPE_ARG_TYPES (type);
14646 /* Figure out whether or not the function has a variable number of
14648 for (; parm; parm = TREE_CHAIN (parm))
14649 if (TREE_VALUE (parm) == void_type_node)
14651 /* If not, the this parameter is in %eax. */
14653 return gen_rtx_REG (SImode, 0);
14656 if (aggregate_value_p (TREE_TYPE (type)))
14657 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14659 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14662 /* Determine whether x86_output_mi_thunk can succeed. */
14665 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14666 tree thunk ATTRIBUTE_UNUSED;
14667 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14668 HOST_WIDE_INT vcall_offset;
14671 /* 64-bit can handle anything. */
14675 /* For 32-bit, everything's fine if we have one free register. */
14676 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14679 /* Need a free register for vcall_offset. */
14683 /* Need a free register for GOT references. */
14684 if (flag_pic && !(*targetm.binds_local_p) (function))
14687 /* Otherwise ok. */
14691 /* Output the assembler code for a thunk function. THUNK_DECL is the
14692 declaration for the thunk function itself, FUNCTION is the decl for
14693 the target function. DELTA is an immediate constant offset to be
14694 added to THIS. If VCALL_OFFSET is nonzero, the word at
14695 *(*this + vcall_offset) should be added to THIS. */
14698 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14699 FILE *file ATTRIBUTE_UNUSED;
14700 tree thunk ATTRIBUTE_UNUSED;
14701 HOST_WIDE_INT delta;
14702 HOST_WIDE_INT vcall_offset;
14706 rtx this = x86_this_parameter (function);
14709 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14710 pull it in now and let DELTA benefit. */
14713 else if (vcall_offset)
14715 /* Put the this parameter into %eax. */
14717 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14718 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14721 this_reg = NULL_RTX;
14723 /* Adjust the this parameter by a fixed constant. */
14726 xops[0] = GEN_INT (delta);
14727 xops[1] = this_reg ? this_reg : this;
14730 if (!x86_64_general_operand (xops[0], DImode))
14732 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14734 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14738 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14741 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14744 /* Adjust the this parameter by a value stored in the vtable. */
14748 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14750 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14752 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14755 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14757 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14759 /* Adjust the this parameter. */
14760 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14761 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14763 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14764 xops[0] = GEN_INT (vcall_offset);
14766 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14767 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14769 xops[1] = this_reg;
14771 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14773 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14776 /* If necessary, drop THIS back to its stack slot. */
14777 if (this_reg && this_reg != this)
14779 xops[0] = this_reg;
14781 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14784 xops[0] = DECL_RTL (function);
14787 if (!flag_pic || (*targetm.binds_local_p) (function))
14788 output_asm_insn ("jmp\t%P0", xops);
14791 tmp = XEXP (xops[0], 0);
14792 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
14793 tmp = gen_rtx_CONST (Pmode, tmp);
14794 tmp = gen_rtx_MEM (QImode, tmp);
14796 output_asm_insn ("jmp\t%A0", xops);
14801 if (!flag_pic || (*targetm.binds_local_p) (function))
14802 output_asm_insn ("jmp\t%P0", xops);
14804 #if defined TARGET_MACHO
14807 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
14808 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
14809 tmp = gen_rtx_MEM (QImode, tmp);
14811 output_asm_insn ("jmp\t%0", xops);
14814 #endif /* TARGET_MACHO */
14816 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14817 output_set_got (tmp);
14820 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14821 output_asm_insn ("jmp\t{*}%1", xops);
14827 x86_field_alignment (field, computed)
14831 enum machine_mode mode;
14832 tree type = TREE_TYPE (field);
14834 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14836 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14837 ? get_inner_array_type (type) : type);
14838 if (mode == DFmode || mode == DCmode
14839 || GET_MODE_CLASS (mode) == MODE_INT
14840 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14841 return MIN (32, computed);
14845 /* Output assembler code to FILE to increment profiler label # LABELNO
14846 for profiling a function entry. */
14848 x86_function_profiler (file, labelno)
14850 int labelno ATTRIBUTE_UNUSED;
14855 #ifndef NO_PROFILE_COUNTERS
14856 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14858 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14862 #ifndef NO_PROFILE_COUNTERS
14863 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14865 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14869 #ifndef NO_PROFILE_COUNTERS
14870 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14871 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14873 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14877 #ifndef NO_PROFILE_COUNTERS
14878 fprintf (file, "\tmovl\t$%sP%d,%%$%s\n", LPREFIX, labelno,
14879 PROFILE_COUNT_REGISTER);
14881 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14885 /* Implement machine specific optimizations.
14886 At the moment we implement single transformation: AMD Athlon works faster
14887 when RET is not destination of conditional jump or directly preceded
14888 by other jump instruction. We avoid the penalty by inserting NOP just
14889 before the RET instructions in such cases. */
14891 x86_machine_dependent_reorg (first)
14892 rtx first ATTRIBUTE_UNUSED;
14896 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
14898 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14900 basic_block bb = e->src;
14903 bool insert = false;
14905 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14907 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
14908 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
14910 if (prev && GET_CODE (prev) == CODE_LABEL)
14913 for (e = bb->pred; e; e = e->pred_next)
14914 if (EDGE_FREQUENCY (e) && e->src->index >= 0
14915 && !(e->flags & EDGE_FALLTHRU))
14920 prev = prev_active_insn (ret);
14921 if (prev && GET_CODE (prev) == JUMP_INSN
14922 && any_condjump_p (prev))
14924 /* Empty functions get branch misspredict even when the jump destination
14925 is not visible to us. */
14926 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
14930 emit_insn_before (gen_nop (), ret);
14934 /* Return nonzero when QImode register that must be represented via REX prefix
14937 x86_extended_QIreg_mentioned_p (insn)
14941 extract_insn_cached (insn);
14942 for (i = 0; i < recog_data.n_operands; i++)
14943 if (REG_P (recog_data.operand[i])
14944 && REGNO (recog_data.operand[i]) >= 4)
14949 /* Return nonzero when P points to register encoded via REX prefix.
14950 Called via for_each_rtx. */
14952 extended_reg_mentioned_1 (p, data)
14954 void *data ATTRIBUTE_UNUSED;
14956 unsigned int regno;
14959 regno = REGNO (*p);
14960 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
14963 /* Return true when INSN mentions register that must be encoded using REX
14966 x86_extended_reg_mentioned_p (insn)
14969 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
14972 #include "gt-i386.h"