1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
71 struct processor_costs size_cost = { /* costs for tuning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123 /* Processor costs (relative to an add) */
125 struct processor_costs i386_cost = { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
178 struct processor_costs i486_cost = { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
231 struct processor_costs pentium_cost = {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
284 struct processor_costs pentiumpro_cost = {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
337 struct processor_costs k6_cost = {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (2), /* cost of a lea instruction */
340 COSTS_N_INSNS (1), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (3), /* HI */
344 COSTS_N_INSNS (3), /* SI */
345 COSTS_N_INSNS (3), /* DI */
346 COSTS_N_INSNS (3)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (18), /* HI */
350 COSTS_N_INSNS (18), /* SI */
351 COSTS_N_INSNS (18), /* DI */
352 COSTS_N_INSNS (18)}, /* other */
353 COSTS_N_INSNS (2), /* cost of movsx */
354 COSTS_N_INSNS (2), /* cost of movzx */
355 8, /* "large" insn */
357 3, /* cost for loading QImode using movzbl */
358 {4, 5, 4}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {2, 3, 2}, /* cost of storing integer registers */
362 4, /* cost of reg,reg fld/fst */
363 {6, 6, 6}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 4, 4}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
367 2, /* cost of moving MMX register */
368 {2, 2}, /* cost of loading MMX registers
369 in SImode and DImode */
370 {2, 2}, /* cost of storing MMX registers
371 in SImode and DImode */
372 2, /* cost of moving SSE register */
373 {2, 2, 8}, /* cost of loading SSE registers
374 in SImode, DImode and TImode */
375 {2, 2, 8}, /* cost of storing SSE registers
376 in SImode, DImode and TImode */
377 6, /* MMX or SSE register to integer */
378 32, /* size of prefetch block */
379 1, /* number of parallel prefetches */
381 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
382 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
383 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
384 COSTS_N_INSNS (2), /* cost of FABS instruction. */
385 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
386 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
390 struct processor_costs athlon_cost = {
391 COSTS_N_INSNS (1), /* cost of an add instruction */
392 COSTS_N_INSNS (2), /* cost of a lea instruction */
393 COSTS_N_INSNS (1), /* variable shift costs */
394 COSTS_N_INSNS (1), /* constant shift costs */
395 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
396 COSTS_N_INSNS (5), /* HI */
397 COSTS_N_INSNS (5), /* SI */
398 COSTS_N_INSNS (5), /* DI */
399 COSTS_N_INSNS (5)}, /* other */
400 0, /* cost of multiply per each bit set */
401 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
402 COSTS_N_INSNS (26), /* HI */
403 COSTS_N_INSNS (42), /* SI */
404 COSTS_N_INSNS (74), /* DI */
405 COSTS_N_INSNS (74)}, /* other */
406 COSTS_N_INSNS (1), /* cost of movsx */
407 COSTS_N_INSNS (1), /* cost of movzx */
408 8, /* "large" insn */
410 4, /* cost for loading QImode using movzbl */
411 {3, 4, 3}, /* cost of loading integer registers
412 in QImode, HImode and SImode.
413 Relative to reg-reg move (2). */
414 {3, 4, 3}, /* cost of storing integer registers */
415 4, /* cost of reg,reg fld/fst */
416 {4, 4, 12}, /* cost of loading fp registers
417 in SFmode, DFmode and XFmode */
418 {6, 6, 8}, /* cost of storing fp registers
419 in SFmode, DFmode and XFmode */
420 2, /* cost of moving MMX register */
421 {4, 4}, /* cost of loading MMX registers
422 in SImode and DImode */
423 {4, 4}, /* cost of storing MMX registers
424 in SImode and DImode */
425 2, /* cost of moving SSE register */
426 {4, 4, 6}, /* cost of loading SSE registers
427 in SImode, DImode and TImode */
428 {4, 4, 5}, /* cost of storing SSE registers
429 in SImode, DImode and TImode */
430 5, /* MMX or SSE register to integer */
431 64, /* size of prefetch block */
432 6, /* number of parallel prefetches */
434 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
435 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
436 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
437 COSTS_N_INSNS (2), /* cost of FABS instruction. */
438 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
439 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
443 struct processor_costs k8_cost = {
444 COSTS_N_INSNS (1), /* cost of an add instruction */
445 COSTS_N_INSNS (2), /* cost of a lea instruction */
446 COSTS_N_INSNS (1), /* variable shift costs */
447 COSTS_N_INSNS (1), /* constant shift costs */
448 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
449 COSTS_N_INSNS (4), /* HI */
450 COSTS_N_INSNS (3), /* SI */
451 COSTS_N_INSNS (4), /* DI */
452 COSTS_N_INSNS (5)}, /* other */
453 0, /* cost of multiply per each bit set */
454 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
455 COSTS_N_INSNS (26), /* HI */
456 COSTS_N_INSNS (42), /* SI */
457 COSTS_N_INSNS (74), /* DI */
458 COSTS_N_INSNS (74)}, /* other */
459 COSTS_N_INSNS (1), /* cost of movsx */
460 COSTS_N_INSNS (1), /* cost of movzx */
461 8, /* "large" insn */
463 4, /* cost for loading QImode using movzbl */
464 {3, 4, 3}, /* cost of loading integer registers
465 in QImode, HImode and SImode.
466 Relative to reg-reg move (2). */
467 {3, 4, 3}, /* cost of storing integer registers */
468 4, /* cost of reg,reg fld/fst */
469 {4, 4, 12}, /* cost of loading fp registers
470 in SFmode, DFmode and XFmode */
471 {6, 6, 8}, /* cost of storing fp registers
472 in SFmode, DFmode and XFmode */
473 2, /* cost of moving MMX register */
474 {3, 3}, /* cost of loading MMX registers
475 in SImode and DImode */
476 {4, 4}, /* cost of storing MMX registers
477 in SImode and DImode */
478 2, /* cost of moving SSE register */
479 {4, 3, 6}, /* cost of loading SSE registers
480 in SImode, DImode and TImode */
481 {4, 4, 5}, /* cost of storing SSE registers
482 in SImode, DImode and TImode */
483 5, /* MMX or SSE register to integer */
484 64, /* size of prefetch block */
485 6, /* number of parallel prefetches */
487 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (2), /* cost of FABS instruction. */
491 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
496 struct processor_costs pentium4_cost = {
497 COSTS_N_INSNS (1), /* cost of an add instruction */
498 COSTS_N_INSNS (3), /* cost of a lea instruction */
499 COSTS_N_INSNS (4), /* variable shift costs */
500 COSTS_N_INSNS (4), /* constant shift costs */
501 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
502 COSTS_N_INSNS (15), /* HI */
503 COSTS_N_INSNS (15), /* SI */
504 COSTS_N_INSNS (15), /* DI */
505 COSTS_N_INSNS (15)}, /* other */
506 0, /* cost of multiply per each bit set */
507 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
508 COSTS_N_INSNS (56), /* HI */
509 COSTS_N_INSNS (56), /* SI */
510 COSTS_N_INSNS (56), /* DI */
511 COSTS_N_INSNS (56)}, /* other */
512 COSTS_N_INSNS (1), /* cost of movsx */
513 COSTS_N_INSNS (1), /* cost of movzx */
514 16, /* "large" insn */
516 2, /* cost for loading QImode using movzbl */
517 {4, 5, 4}, /* cost of loading integer registers
518 in QImode, HImode and SImode.
519 Relative to reg-reg move (2). */
520 {2, 3, 2}, /* cost of storing integer registers */
521 2, /* cost of reg,reg fld/fst */
522 {2, 2, 6}, /* cost of loading fp registers
523 in SFmode, DFmode and XFmode */
524 {4, 4, 6}, /* cost of storing fp registers
525 in SFmode, DFmode and XFmode */
526 2, /* cost of moving MMX register */
527 {2, 2}, /* cost of loading MMX registers
528 in SImode and DImode */
529 {2, 2}, /* cost of storing MMX registers
530 in SImode and DImode */
531 12, /* cost of moving SSE register */
532 {12, 12, 12}, /* cost of loading SSE registers
533 in SImode, DImode and TImode */
534 {2, 2, 8}, /* cost of storing SSE registers
535 in SImode, DImode and TImode */
536 10, /* MMX or SSE register to integer */
537 64, /* size of prefetch block */
538 6, /* number of parallel prefetches */
540 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
541 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
542 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
543 COSTS_N_INSNS (2), /* cost of FABS instruction. */
544 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
545 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
549 struct processor_costs nocona_cost = {
550 COSTS_N_INSNS (1), /* cost of an add instruction */
551 COSTS_N_INSNS (1), /* cost of a lea instruction */
552 COSTS_N_INSNS (1), /* variable shift costs */
553 COSTS_N_INSNS (1), /* constant shift costs */
554 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
555 COSTS_N_INSNS (10), /* HI */
556 COSTS_N_INSNS (10), /* SI */
557 COSTS_N_INSNS (10), /* DI */
558 COSTS_N_INSNS (10)}, /* other */
559 0, /* cost of multiply per each bit set */
560 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
561 COSTS_N_INSNS (66), /* HI */
562 COSTS_N_INSNS (66), /* SI */
563 COSTS_N_INSNS (66), /* DI */
564 COSTS_N_INSNS (66)}, /* other */
565 COSTS_N_INSNS (1), /* cost of movsx */
566 COSTS_N_INSNS (1), /* cost of movzx */
567 16, /* "large" insn */
569 4, /* cost for loading QImode using movzbl */
570 {4, 4, 4}, /* cost of loading integer registers
571 in QImode, HImode and SImode.
572 Relative to reg-reg move (2). */
573 {4, 4, 4}, /* cost of storing integer registers */
574 3, /* cost of reg,reg fld/fst */
575 {12, 12, 12}, /* cost of loading fp registers
576 in SFmode, DFmode and XFmode */
577 {4, 4, 4}, /* cost of storing fp registers
578 in SFmode, DFmode and XFmode */
579 6, /* cost of moving MMX register */
580 {12, 12}, /* cost of loading MMX registers
581 in SImode and DImode */
582 {12, 12}, /* cost of storing MMX registers
583 in SImode and DImode */
584 6, /* cost of moving SSE register */
585 {12, 12, 12}, /* cost of loading SSE registers
586 in SImode, DImode and TImode */
587 {12, 12, 12}, /* cost of storing SSE registers
588 in SImode, DImode and TImode */
589 8, /* MMX or SSE register to integer */
590 128, /* size of prefetch block */
591 8, /* number of parallel prefetches */
593 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
594 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
595 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
596 COSTS_N_INSNS (3), /* cost of FABS instruction. */
597 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
598 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
601 /* Generic64 should produce code tuned for Nocona and K8. */
603 struct processor_costs generic64_cost = {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 /* On all chips taken into consideration lea is 2 cycles and more. With
606 this cost however our current implementation of synth_mult results in
607 use of unnecessary temporary registers causing regression on several
608 SPECfp benchmarks. */
609 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (2)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (26), /* HI */
620 COSTS_N_INSNS (42), /* SI */
621 COSTS_N_INSNS (74), /* DI */
622 COSTS_N_INSNS (74)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {4, 4, 4}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {4, 4, 4}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {12, 12, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {8, 8}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {8, 8}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {8, 8, 8}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {8, 8, 8}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 5, /* MMX or SSE register to integer */
648 64, /* size of prefetch block */
649 6, /* number of parallel prefetches */
650 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
651 is increased to perhaps more appropriate value of 5. */
653 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (8), /* cost of FABS instruction. */
657 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
661 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
663 struct processor_costs generic32_cost = {
664 COSTS_N_INSNS (1), /* cost of an add instruction */
665 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
666 COSTS_N_INSNS (1), /* variable shift costs */
667 COSTS_N_INSNS (1), /* constant shift costs */
668 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
669 COSTS_N_INSNS (4), /* HI */
670 COSTS_N_INSNS (3), /* SI */
671 COSTS_N_INSNS (4), /* DI */
672 COSTS_N_INSNS (2)}, /* other */
673 0, /* cost of multiply per each bit set */
674 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
675 COSTS_N_INSNS (26), /* HI */
676 COSTS_N_INSNS (42), /* SI */
677 COSTS_N_INSNS (74), /* DI */
678 COSTS_N_INSNS (74)}, /* other */
679 COSTS_N_INSNS (1), /* cost of movsx */
680 COSTS_N_INSNS (1), /* cost of movzx */
681 8, /* "large" insn */
683 4, /* cost for loading QImode using movzbl */
684 {4, 4, 4}, /* cost of loading integer registers
685 in QImode, HImode and SImode.
686 Relative to reg-reg move (2). */
687 {4, 4, 4}, /* cost of storing integer registers */
688 4, /* cost of reg,reg fld/fst */
689 {12, 12, 12}, /* cost of loading fp registers
690 in SFmode, DFmode and XFmode */
691 {6, 6, 8}, /* cost of storing fp registers
692 in SFmode, DFmode and XFmode */
693 2, /* cost of moving MMX register */
694 {8, 8}, /* cost of loading MMX registers
695 in SImode and DImode */
696 {8, 8}, /* cost of storing MMX registers
697 in SImode and DImode */
698 2, /* cost of moving SSE register */
699 {8, 8, 8}, /* cost of loading SSE registers
700 in SImode, DImode and TImode */
701 {8, 8, 8}, /* cost of storing SSE registers
702 in SImode, DImode and TImode */
703 5, /* MMX or SSE register to integer */
704 64, /* size of prefetch block */
705 6, /* number of parallel prefetches */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
715 const struct processor_costs *ix86_cost = &pentium_cost;
717 /* Processor feature/optimization bitmasks. */
718 #define m_386 (1<<PROCESSOR_I386)
719 #define m_486 (1<<PROCESSOR_I486)
720 #define m_PENT (1<<PROCESSOR_PENTIUM)
721 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
722 #define m_K6 (1<<PROCESSOR_K6)
723 #define m_ATHLON (1<<PROCESSOR_ATHLON)
724 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
725 #define m_K8 (1<<PROCESSOR_K8)
726 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
727 #define m_NOCONA (1<<PROCESSOR_NOCONA)
728 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
729 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
730 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
732 /* Generic instruction choice should be common subset of supported CPUs
733 (PPro/PENT4/NOCONA/Athlon/K8). */
735 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
736 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
737 generic because it is not working well with PPro base chips. */
738 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
739 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
740 const int x86_zero_extend_with_and = m_486 | m_PENT;
741 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
742 const int x86_double_with_add = ~m_386;
743 const int x86_use_bit_test = m_386;
744 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
745 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
746 const int x86_fisttp = m_NOCONA;
747 const int x86_3dnow_a = m_ATHLON_K8;
748 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
749 /* Branch hints were put in P4 based on simulation result. But
750 after P4 was made, no performance benefit was observed with
751 branch hints. It also increases the code size. As the result,
752 icc never generates branch hints. */
753 const int x86_branch_hints = 0;
754 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
755 /* We probably ought to watch for partial register stalls on Generic32
756 compilation setting as well. However in current implementation the
757 partial register stalls are not eliminated very well - they can
758 be introduced via subregs synthesized by combine and can happen
759 in caller/callee saving sequences.
760 Because this option pays back little on PPro based chips and is in conflict
761 with partial reg. dependencies used by Athlon/P4 based chips, it is better
762 to leave it off for generic32 for now. */
763 const int x86_partial_reg_stall = m_PPRO;
764 const int x86_partial_flag_reg_stall = m_GENERIC;
765 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
766 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
767 const int x86_use_mov0 = m_K6;
768 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
769 const int x86_read_modify_write = ~m_PENT;
770 const int x86_read_modify = ~(m_PENT | m_PPRO);
771 const int x86_split_long_moves = m_PPRO;
772 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
773 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
774 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
775 const int x86_qimode_math = ~(0);
776 const int x86_promote_qi_regs = 0;
777 /* On PPro this flag is meant to avoid partial register stalls. Just like
778 the x86_partial_reg_stall this option might be considered for Generic32
779 if our scheme for avoiding partial stalls was more effective. */
780 const int x86_himode_math = ~(m_PPRO);
781 const int x86_promote_hi_regs = m_PPRO;
782 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
783 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
784 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
785 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
786 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
787 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
788 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
789 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
790 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
791 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
792 const int x86_shift1 = ~m_486;
793 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
794 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
795 that thread 128bit SSE registers as single units versus K8 based chips that
796 divide SSE registers to two 64bit halves.
797 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
798 to allow register renaming on 128bit SSE units, but usually results in one
799 extra microop on 64bit SSE units. Experimental results shows that disabling
800 this option on P4 brings over 20% SPECfp regression, while enabling it on
801 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
803 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
804 /* Set for machines where the type and dependencies are resolved on SSE
805 register parts instead of whole registers, so we may maintain just
806 lower part of scalar values in proper format leaving the upper part
808 const int x86_sse_split_regs = m_ATHLON_K8;
809 const int x86_sse_typeless_stores = m_ATHLON_K8;
810 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
811 const int x86_use_ffreep = m_ATHLON_K8;
812 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
813 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
815 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
816 integer data in xmm registers. Which results in pretty abysmal code. */
817 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
819 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
820 /* Some CPU cores are not able to predict more than 4 branch instructions in
821 the 16 byte window. */
822 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
823 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
824 const int x86_use_bt = m_ATHLON_K8;
825 /* Compare and exchange was added for 80486. */
826 const int x86_cmpxchg = ~m_386;
827 /* Compare and exchange 8 bytes was added for pentium. */
828 const int x86_cmpxchg8b = ~(m_386 | m_486);
829 /* Compare and exchange 16 bytes was added for nocona. */
830 const int x86_cmpxchg16b = m_NOCONA;
831 /* Exchange and add was added for 80486. */
832 const int x86_xadd = ~m_386;
833 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
835 /* In case the average insn count for single function invocation is
836 lower than this constant, emit fast (but longer) prologue and
838 #define FAST_PROLOGUE_INSN_COUNT 20
840 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
841 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
842 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
843 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
845 /* Array of the smallest class containing reg number REGNO, indexed by
846 REGNO. Used by REGNO_REG_CLASS in i386.h. */
848 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
851 AREG, DREG, CREG, BREG,
853 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
855 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
856 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
859 /* flags, fpsr, dirflag, frame */
860 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
861 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
863 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
865 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
866 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
867 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
871 /* The "default" register map used in 32bit mode. */
873 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
875 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
876 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
877 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
878 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
879 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
880 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
881 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
884 static int const x86_64_int_parameter_registers[6] =
886 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
887 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
890 static int const x86_64_int_return_registers[4] =
892 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
895 /* The "default" register map used in 64bit mode. */
896 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
898 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
899 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
900 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
901 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
902 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
903 8,9,10,11,12,13,14,15, /* extended integer registers */
904 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
907 /* Define the register numbers to be used in Dwarf debugging information.
908 The SVR4 reference port C compiler uses the following register numbers
909 in its Dwarf output code:
910 0 for %eax (gcc regno = 0)
911 1 for %ecx (gcc regno = 2)
912 2 for %edx (gcc regno = 1)
913 3 for %ebx (gcc regno = 3)
914 4 for %esp (gcc regno = 7)
915 5 for %ebp (gcc regno = 6)
916 6 for %esi (gcc regno = 4)
917 7 for %edi (gcc regno = 5)
918 The following three DWARF register numbers are never generated by
919 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
920 believes these numbers have these meanings.
921 8 for %eip (no gcc equivalent)
922 9 for %eflags (gcc regno = 17)
923 10 for %trapno (no gcc equivalent)
924 It is not at all clear how we should number the FP stack registers
925 for the x86 architecture. If the version of SDB on x86/svr4 were
926 a bit less brain dead with respect to floating-point then we would
927 have a precedent to follow with respect to DWARF register numbers
928 for x86 FP registers, but the SDB on x86/svr4 is so completely
929 broken with respect to FP registers that it is hardly worth thinking
930 of it as something to strive for compatibility with.
931 The version of x86/svr4 SDB I have at the moment does (partially)
932 seem to believe that DWARF register number 11 is associated with
933 the x86 register %st(0), but that's about all. Higher DWARF
934 register numbers don't seem to be associated with anything in
935 particular, and even for DWARF regno 11, SDB only seems to under-
936 stand that it should say that a variable lives in %st(0) (when
937 asked via an `=' command) if we said it was in DWARF regno 11,
938 but SDB still prints garbage when asked for the value of the
939 variable in question (via a `/' command).
940 (Also note that the labels SDB prints for various FP stack regs
941 when doing an `x' command are all wrong.)
942 Note that these problems generally don't affect the native SVR4
943 C compiler because it doesn't allow the use of -O with -g and
944 because when it is *not* optimizing, it allocates a memory
945 location for each floating-point variable, and the memory
946 location is what gets described in the DWARF AT_location
947 attribute for the variable in question.
948 Regardless of the severe mental illness of the x86/svr4 SDB, we
949 do something sensible here and we use the following DWARF
950 register numbers. Note that these are all stack-top-relative
952 11 for %st(0) (gcc regno = 8)
953 12 for %st(1) (gcc regno = 9)
954 13 for %st(2) (gcc regno = 10)
955 14 for %st(3) (gcc regno = 11)
956 15 for %st(4) (gcc regno = 12)
957 16 for %st(5) (gcc regno = 13)
958 17 for %st(6) (gcc regno = 14)
959 18 for %st(7) (gcc regno = 15)
961 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
963 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
964 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
965 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
966 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
967 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
968 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
969 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
972 /* Test and compare insns in i386.md store the information needed to
973 generate branch and scc insns here. */
975 rtx ix86_compare_op0 = NULL_RTX;
976 rtx ix86_compare_op1 = NULL_RTX;
977 rtx ix86_compare_emitted = NULL_RTX;
979 /* Size of the register save area. */
980 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
982 /* Define the structure for the machine field in struct function. */
984 struct stack_local_entry GTY(())
989 struct stack_local_entry *next;
992 /* Structure describing stack frame layout.
993 Stack grows downward:
999 saved frame pointer if frame_pointer_needed
1000 <- HARD_FRAME_POINTER
1005 [va_arg registers] (
1006 > to_allocate <- FRAME_POINTER
1016 HOST_WIDE_INT frame;
1018 int outgoing_arguments_size;
1021 HOST_WIDE_INT to_allocate;
1022 /* The offsets relative to ARG_POINTER. */
1023 HOST_WIDE_INT frame_pointer_offset;
1024 HOST_WIDE_INT hard_frame_pointer_offset;
1025 HOST_WIDE_INT stack_pointer_offset;
1027 /* When save_regs_using_mov is set, emit prologue using
1028 move instead of push instructions. */
1029 bool save_regs_using_mov;
1032 /* Code model option. */
1033 enum cmodel ix86_cmodel;
1035 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1037 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1039 /* Which unit we are generating floating point math for. */
1040 enum fpmath_unit ix86_fpmath;
1042 /* Which cpu are we scheduling for. */
1043 enum processor_type ix86_tune;
1044 /* Which instruction set architecture to use. */
1045 enum processor_type ix86_arch;
1047 /* true if sse prefetch instruction is not NOOP. */
1048 int x86_prefetch_sse;
1050 /* ix86_regparm_string as a number */
1051 static int ix86_regparm;
1053 /* -mstackrealign option */
1054 extern int ix86_force_align_arg_pointer;
1055 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1057 /* Preferred alignment for stack boundary in bits. */
1058 unsigned int ix86_preferred_stack_boundary;
1060 /* Values 1-5: see jump.c */
1061 int ix86_branch_cost;
1063 /* Variables which are this size or smaller are put in the data/bss
1064 or ldata/lbss sections. */
1066 int ix86_section_threshold = 65536;
1068 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1069 char internal_label_prefix[16];
1070 int internal_label_prefix_len;
1072 static bool ix86_handle_option (size_t, const char *, int);
1073 static void output_pic_addr_const (FILE *, rtx, int);
1074 static void put_condition_code (enum rtx_code, enum machine_mode,
1076 static const char *get_some_local_dynamic_name (void);
1077 static int get_some_local_dynamic_name_1 (rtx *, void *);
1078 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1079 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1081 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1082 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1084 static rtx get_thread_pointer (int);
1085 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1086 static void get_pc_thunk_name (char [32], unsigned int);
1087 static rtx gen_push (rtx);
1088 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1089 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1090 static struct machine_function * ix86_init_machine_status (void);
1091 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1092 static int ix86_nsaved_regs (void);
1093 static void ix86_emit_save_regs (void);
1094 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1095 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1096 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1097 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1098 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1099 static rtx ix86_expand_aligntest (rtx, int);
1100 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1101 static int ix86_issue_rate (void);
1102 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1103 static int ia32_multipass_dfa_lookahead (void);
1104 static void ix86_init_mmx_sse_builtins (void);
1105 static rtx x86_this_parameter (tree);
1106 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1107 HOST_WIDE_INT, tree);
1108 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1109 static void x86_file_start (void);
1110 static void ix86_reorg (void);
1111 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1112 static tree ix86_build_builtin_va_list (void);
1113 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1115 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1116 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1117 static bool ix86_vector_mode_supported_p (enum machine_mode);
1119 static int ix86_address_cost (rtx);
1120 static bool ix86_cannot_force_const_mem (rtx);
1121 static rtx ix86_delegitimize_address (rtx);
1123 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1125 struct builtin_description;
1126 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1128 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1130 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1131 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1132 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1133 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1134 static rtx safe_vector_operand (rtx, enum machine_mode);
1135 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1136 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1137 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1138 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1139 static int ix86_fp_comparison_cost (enum rtx_code code);
1140 static unsigned int ix86_select_alt_pic_regnum (void);
1141 static int ix86_save_reg (unsigned int, int);
1142 static void ix86_compute_frame_layout (struct ix86_frame *);
1143 static int ix86_comp_type_attributes (tree, tree);
1144 static int ix86_function_regparm (tree, tree);
1145 const struct attribute_spec ix86_attribute_table[];
1146 static bool ix86_function_ok_for_sibcall (tree, tree);
1147 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1148 static int ix86_value_regno (enum machine_mode, tree, tree);
1149 static bool contains_128bit_aligned_vector_p (tree);
1150 static rtx ix86_struct_value_rtx (tree, int);
1151 static bool ix86_ms_bitfield_layout_p (tree);
1152 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1153 static int extended_reg_mentioned_1 (rtx *, void *);
1154 static bool ix86_rtx_costs (rtx, int, int, int *);
1155 static int min_insn_size (rtx);
1156 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1157 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1158 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1160 static void ix86_init_builtins (void);
1161 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1162 static const char *ix86_mangle_fundamental_type (tree);
1163 static tree ix86_stack_protect_fail (void);
1164 static rtx ix86_internal_arg_pointer (void);
1165 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1167 /* This function is only used on Solaris. */
1168 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1171 /* Register class used for passing given 64bit part of the argument.
1172 These represent classes as documented by the PS ABI, with the exception
1173 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1174 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1176 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1177 whenever possible (upper half does contain padding).
1179 enum x86_64_reg_class
1182 X86_64_INTEGER_CLASS,
1183 X86_64_INTEGERSI_CLASS,
1190 X86_64_COMPLEX_X87_CLASS,
1193 static const char * const x86_64_reg_class_name[] = {
1194 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1195 "sseup", "x87", "x87up", "cplx87", "no"
1198 #define MAX_CLASSES 4
1200 /* Table of constants used by fldpi, fldln2, etc.... */
1201 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1202 static bool ext_80387_constants_init = 0;
1203 static void init_ext_80387_constants (void);
1204 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1205 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1206 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1207 static section *x86_64_elf_select_section (tree decl, int reloc,
1208 unsigned HOST_WIDE_INT align)
1211 /* Initialize the GCC target structure. */
1212 #undef TARGET_ATTRIBUTE_TABLE
1213 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1214 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1215 # undef TARGET_MERGE_DECL_ATTRIBUTES
1216 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1219 #undef TARGET_COMP_TYPE_ATTRIBUTES
1220 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1222 #undef TARGET_INIT_BUILTINS
1223 #define TARGET_INIT_BUILTINS ix86_init_builtins
1224 #undef TARGET_EXPAND_BUILTIN
1225 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1227 #undef TARGET_ASM_FUNCTION_EPILOGUE
1228 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1230 #undef TARGET_ENCODE_SECTION_INFO
1231 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1232 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1234 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1237 #undef TARGET_ASM_OPEN_PAREN
1238 #define TARGET_ASM_OPEN_PAREN ""
1239 #undef TARGET_ASM_CLOSE_PAREN
1240 #define TARGET_ASM_CLOSE_PAREN ""
1242 #undef TARGET_ASM_ALIGNED_HI_OP
1243 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1244 #undef TARGET_ASM_ALIGNED_SI_OP
1245 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1247 #undef TARGET_ASM_ALIGNED_DI_OP
1248 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1251 #undef TARGET_ASM_UNALIGNED_HI_OP
1252 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1253 #undef TARGET_ASM_UNALIGNED_SI_OP
1254 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1255 #undef TARGET_ASM_UNALIGNED_DI_OP
1256 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1258 #undef TARGET_SCHED_ADJUST_COST
1259 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1260 #undef TARGET_SCHED_ISSUE_RATE
1261 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1262 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1263 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1264 ia32_multipass_dfa_lookahead
1266 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1267 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1270 #undef TARGET_HAVE_TLS
1271 #define TARGET_HAVE_TLS true
1273 #undef TARGET_CANNOT_FORCE_CONST_MEM
1274 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1275 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1276 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1278 #undef TARGET_DELEGITIMIZE_ADDRESS
1279 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1281 #undef TARGET_MS_BITFIELD_LAYOUT_P
1282 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1285 #undef TARGET_BINDS_LOCAL_P
1286 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1289 #undef TARGET_ASM_OUTPUT_MI_THUNK
1290 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1291 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1292 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1294 #undef TARGET_ASM_FILE_START
1295 #define TARGET_ASM_FILE_START x86_file_start
1297 #undef TARGET_DEFAULT_TARGET_FLAGS
1298 #define TARGET_DEFAULT_TARGET_FLAGS \
1300 | TARGET_64BIT_DEFAULT \
1301 | TARGET_SUBTARGET_DEFAULT \
1302 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1304 #undef TARGET_HANDLE_OPTION
1305 #define TARGET_HANDLE_OPTION ix86_handle_option
1307 #undef TARGET_RTX_COSTS
1308 #define TARGET_RTX_COSTS ix86_rtx_costs
1309 #undef TARGET_ADDRESS_COST
1310 #define TARGET_ADDRESS_COST ix86_address_cost
1312 #undef TARGET_FIXED_CONDITION_CODE_REGS
1313 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1314 #undef TARGET_CC_MODES_COMPATIBLE
1315 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1317 #undef TARGET_MACHINE_DEPENDENT_REORG
1318 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1320 #undef TARGET_BUILD_BUILTIN_VA_LIST
1321 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1323 #undef TARGET_MD_ASM_CLOBBERS
1324 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1326 #undef TARGET_PROMOTE_PROTOTYPES
1327 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1328 #undef TARGET_STRUCT_VALUE_RTX
1329 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1330 #undef TARGET_SETUP_INCOMING_VARARGS
1331 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1332 #undef TARGET_MUST_PASS_IN_STACK
1333 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1334 #undef TARGET_PASS_BY_REFERENCE
1335 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1336 #undef TARGET_INTERNAL_ARG_POINTER
1337 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1338 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1339 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1341 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1342 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1344 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1345 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1347 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1348 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1351 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1352 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1355 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1356 #undef TARGET_INSERT_ATTRIBUTES
1357 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1360 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1361 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1363 #undef TARGET_STACK_PROTECT_FAIL
1364 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1366 #undef TARGET_FUNCTION_VALUE
1367 #define TARGET_FUNCTION_VALUE ix86_function_value
1369 struct gcc_target targetm = TARGET_INITIALIZER;
1372 /* The svr4 ABI for the i386 says that records and unions are returned
1374 #ifndef DEFAULT_PCC_STRUCT_RETURN
1375 #define DEFAULT_PCC_STRUCT_RETURN 1
1378 /* Implement TARGET_HANDLE_OPTION. */
1381 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1388 target_flags &= ~MASK_3DNOW_A;
1389 target_flags_explicit |= MASK_3DNOW_A;
1396 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1397 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1404 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1405 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1412 target_flags &= ~MASK_SSE3;
1413 target_flags_explicit |= MASK_SSE3;
1422 /* Sometimes certain combinations of command options do not make
1423 sense on a particular target machine. You can define a macro
1424 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1425 defined, is executed once just after all the command options have
1428 Don't use this macro to turn on various extra optimizations for
1429 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1432 override_options (void)
1435 int ix86_tune_defaulted = 0;
1437 /* Comes from final.c -- no real reason to change it. */
1438 #define MAX_CODE_ALIGN 16
1442 const struct processor_costs *cost; /* Processor costs */
1443 const int target_enable; /* Target flags to enable. */
1444 const int target_disable; /* Target flags to disable. */
1445 const int align_loop; /* Default alignments. */
1446 const int align_loop_max_skip;
1447 const int align_jump;
1448 const int align_jump_max_skip;
1449 const int align_func;
1451 const processor_target_table[PROCESSOR_max] =
1453 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1454 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1455 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1456 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1457 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1458 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1459 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1460 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1461 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1462 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1463 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1466 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1469 const char *const name; /* processor name or nickname. */
1470 const enum processor_type processor;
1471 const enum pta_flags
1477 PTA_PREFETCH_SSE = 16,
1483 const processor_alias_table[] =
1485 {"i386", PROCESSOR_I386, 0},
1486 {"i486", PROCESSOR_I486, 0},
1487 {"i586", PROCESSOR_PENTIUM, 0},
1488 {"pentium", PROCESSOR_PENTIUM, 0},
1489 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1490 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1491 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1492 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1493 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1494 {"i686", PROCESSOR_PENTIUMPRO, 0},
1495 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1496 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1497 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1498 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1499 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1500 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1501 | PTA_MMX | PTA_PREFETCH_SSE},
1502 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1503 | PTA_MMX | PTA_PREFETCH_SSE},
1504 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1505 | PTA_MMX | PTA_PREFETCH_SSE},
1506 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1507 | PTA_MMX | PTA_PREFETCH_SSE},
1508 {"k6", PROCESSOR_K6, PTA_MMX},
1509 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1510 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1511 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1513 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1514 | PTA_3DNOW | PTA_3DNOW_A},
1515 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1516 | PTA_3DNOW_A | PTA_SSE},
1517 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1518 | PTA_3DNOW_A | PTA_SSE},
1519 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1520 | PTA_3DNOW_A | PTA_SSE},
1521 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1522 | PTA_SSE | PTA_SSE2 },
1523 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1524 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1525 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1526 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1527 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1528 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1529 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1530 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1531 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1532 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1535 int const pta_size = ARRAY_SIZE (processor_alias_table);
1537 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1538 SUBTARGET_OVERRIDE_OPTIONS;
1541 /* -fPIC is the default for x86_64. */
1542 if (TARGET_MACHO && TARGET_64BIT)
1545 /* Set the default values for switches whose default depends on TARGET_64BIT
1546 in case they weren't overwritten by command line options. */
1549 /* Mach-O doesn't support omitting the frame pointer for now. */
1550 if (flag_omit_frame_pointer == 2)
1551 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1552 if (flag_asynchronous_unwind_tables == 2)
1553 flag_asynchronous_unwind_tables = 1;
1554 if (flag_pcc_struct_return == 2)
1555 flag_pcc_struct_return = 0;
1559 if (flag_omit_frame_pointer == 2)
1560 flag_omit_frame_pointer = 0;
1561 if (flag_asynchronous_unwind_tables == 2)
1562 flag_asynchronous_unwind_tables = 0;
1563 if (flag_pcc_struct_return == 2)
1564 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1567 /* Need to check -mtune=generic first. */
1568 if (ix86_tune_string)
1570 if (!strcmp (ix86_tune_string, "generic")
1571 || !strcmp (ix86_tune_string, "i686")
1572 /* As special support for cross compilers we read -mtune=native
1573 as -mtune=generic. With native compilers we won't see the
1574 -mtune=native, as it was changed by the driver. */
1575 || !strcmp (ix86_tune_string, "native"))
1578 ix86_tune_string = "generic64";
1580 ix86_tune_string = "generic32";
1582 else if (!strncmp (ix86_tune_string, "generic", 7))
1583 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1587 if (ix86_arch_string)
1588 ix86_tune_string = ix86_arch_string;
1589 if (!ix86_tune_string)
1591 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1592 ix86_tune_defaulted = 1;
1595 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1596 need to use a sensible tune option. */
1597 if (!strcmp (ix86_tune_string, "generic")
1598 || !strcmp (ix86_tune_string, "x86-64")
1599 || !strcmp (ix86_tune_string, "i686"))
1602 ix86_tune_string = "generic64";
1604 ix86_tune_string = "generic32";
1607 if (!strcmp (ix86_tune_string, "x86-64"))
1608 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1609 "-mtune=generic instead as appropriate.");
1611 if (!ix86_arch_string)
1612 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1613 if (!strcmp (ix86_arch_string, "generic"))
1614 error ("generic CPU can be used only for -mtune= switch");
1615 if (!strncmp (ix86_arch_string, "generic", 7))
1616 error ("bad value (%s) for -march= switch", ix86_arch_string);
1618 if (ix86_cmodel_string != 0)
1620 if (!strcmp (ix86_cmodel_string, "small"))
1621 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1622 else if (!strcmp (ix86_cmodel_string, "medium"))
1623 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1625 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1626 else if (!strcmp (ix86_cmodel_string, "32"))
1627 ix86_cmodel = CM_32;
1628 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1629 ix86_cmodel = CM_KERNEL;
1630 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1631 ix86_cmodel = CM_LARGE;
1633 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1637 ix86_cmodel = CM_32;
1639 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1641 if (ix86_asm_string != 0)
1644 && !strcmp (ix86_asm_string, "intel"))
1645 ix86_asm_dialect = ASM_INTEL;
1646 else if (!strcmp (ix86_asm_string, "att"))
1647 ix86_asm_dialect = ASM_ATT;
1649 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1651 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1652 error ("code model %qs not supported in the %s bit mode",
1653 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1654 if (ix86_cmodel == CM_LARGE)
1655 sorry ("code model %<large%> not supported yet");
1656 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1657 sorry ("%i-bit mode not compiled in",
1658 (target_flags & MASK_64BIT) ? 64 : 32);
1660 for (i = 0; i < pta_size; i++)
1661 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1663 ix86_arch = processor_alias_table[i].processor;
1664 /* Default cpu tuning to the architecture. */
1665 ix86_tune = ix86_arch;
1666 if (processor_alias_table[i].flags & PTA_MMX
1667 && !(target_flags_explicit & MASK_MMX))
1668 target_flags |= MASK_MMX;
1669 if (processor_alias_table[i].flags & PTA_3DNOW
1670 && !(target_flags_explicit & MASK_3DNOW))
1671 target_flags |= MASK_3DNOW;
1672 if (processor_alias_table[i].flags & PTA_3DNOW_A
1673 && !(target_flags_explicit & MASK_3DNOW_A))
1674 target_flags |= MASK_3DNOW_A;
1675 if (processor_alias_table[i].flags & PTA_SSE
1676 && !(target_flags_explicit & MASK_SSE))
1677 target_flags |= MASK_SSE;
1678 if (processor_alias_table[i].flags & PTA_SSE2
1679 && !(target_flags_explicit & MASK_SSE2))
1680 target_flags |= MASK_SSE2;
1681 if (processor_alias_table[i].flags & PTA_SSE3
1682 && !(target_flags_explicit & MASK_SSE3))
1683 target_flags |= MASK_SSE3;
1684 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1685 x86_prefetch_sse = true;
1686 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1687 error ("CPU you selected does not support x86-64 "
1693 error ("bad value (%s) for -march= switch", ix86_arch_string);
1695 for (i = 0; i < pta_size; i++)
1696 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1698 ix86_tune = processor_alias_table[i].processor;
1699 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1701 if (ix86_tune_defaulted)
1703 ix86_tune_string = "x86-64";
1704 for (i = 0; i < pta_size; i++)
1705 if (! strcmp (ix86_tune_string,
1706 processor_alias_table[i].name))
1708 ix86_tune = processor_alias_table[i].processor;
1711 error ("CPU you selected does not support x86-64 "
1714 /* Intel CPUs have always interpreted SSE prefetch instructions as
1715 NOPs; so, we can enable SSE prefetch instructions even when
1716 -mtune (rather than -march) points us to a processor that has them.
1717 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1718 higher processors. */
1719 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1720 x86_prefetch_sse = true;
1724 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1727 ix86_cost = &size_cost;
1729 ix86_cost = processor_target_table[ix86_tune].cost;
1730 target_flags |= processor_target_table[ix86_tune].target_enable;
1731 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1733 /* Arrange to set up i386_stack_locals for all functions. */
1734 init_machine_status = ix86_init_machine_status;
1736 /* Validate -mregparm= value. */
1737 if (ix86_regparm_string)
1739 i = atoi (ix86_regparm_string);
1740 if (i < 0 || i > REGPARM_MAX)
1741 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1747 ix86_regparm = REGPARM_MAX;
1749 /* If the user has provided any of the -malign-* options,
1750 warn and use that value only if -falign-* is not set.
1751 Remove this code in GCC 3.2 or later. */
1752 if (ix86_align_loops_string)
1754 warning (0, "-malign-loops is obsolete, use -falign-loops");
1755 if (align_loops == 0)
1757 i = atoi (ix86_align_loops_string);
1758 if (i < 0 || i > MAX_CODE_ALIGN)
1759 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1761 align_loops = 1 << i;
1765 if (ix86_align_jumps_string)
1767 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1768 if (align_jumps == 0)
1770 i = atoi (ix86_align_jumps_string);
1771 if (i < 0 || i > MAX_CODE_ALIGN)
1772 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1774 align_jumps = 1 << i;
1778 if (ix86_align_funcs_string)
1780 warning (0, "-malign-functions is obsolete, use -falign-functions");
1781 if (align_functions == 0)
1783 i = atoi (ix86_align_funcs_string);
1784 if (i < 0 || i > MAX_CODE_ALIGN)
1785 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1787 align_functions = 1 << i;
1791 /* Default align_* from the processor table. */
1792 if (align_loops == 0)
1794 align_loops = processor_target_table[ix86_tune].align_loop;
1795 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1797 if (align_jumps == 0)
1799 align_jumps = processor_target_table[ix86_tune].align_jump;
1800 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1802 if (align_functions == 0)
1804 align_functions = processor_target_table[ix86_tune].align_func;
1807 /* Validate -mbranch-cost= value, or provide default. */
1808 ix86_branch_cost = ix86_cost->branch_cost;
1809 if (ix86_branch_cost_string)
1811 i = atoi (ix86_branch_cost_string);
1813 error ("-mbranch-cost=%d is not between 0 and 5", i);
1815 ix86_branch_cost = i;
1817 if (ix86_section_threshold_string)
1819 i = atoi (ix86_section_threshold_string);
1821 error ("-mlarge-data-threshold=%d is negative", i);
1823 ix86_section_threshold = i;
1826 if (ix86_tls_dialect_string)
1828 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1829 ix86_tls_dialect = TLS_DIALECT_GNU;
1830 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1831 ix86_tls_dialect = TLS_DIALECT_GNU2;
1832 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1833 ix86_tls_dialect = TLS_DIALECT_SUN;
1835 error ("bad value (%s) for -mtls-dialect= switch",
1836 ix86_tls_dialect_string);
1839 /* Keep nonleaf frame pointers. */
1840 if (flag_omit_frame_pointer)
1841 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1842 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1843 flag_omit_frame_pointer = 1;
1845 /* If we're doing fast math, we don't care about comparison order
1846 wrt NaNs. This lets us use a shorter comparison sequence. */
1847 if (flag_unsafe_math_optimizations)
1848 target_flags &= ~MASK_IEEE_FP;
1850 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1851 since the insns won't need emulation. */
1852 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1853 target_flags &= ~MASK_NO_FANCY_MATH_387;
1855 /* Likewise, if the target doesn't have a 387, or we've specified
1856 software floating point, don't use 387 inline intrinsics. */
1858 target_flags |= MASK_NO_FANCY_MATH_387;
1860 /* Turn on SSE2 builtins for -msse3. */
1862 target_flags |= MASK_SSE2;
1864 /* Turn on SSE builtins for -msse2. */
1866 target_flags |= MASK_SSE;
1868 /* Turn on MMX builtins for -msse. */
1871 target_flags |= MASK_MMX & ~target_flags_explicit;
1872 x86_prefetch_sse = true;
1875 /* Turn on MMX builtins for 3Dnow. */
1877 target_flags |= MASK_MMX;
1881 if (TARGET_ALIGN_DOUBLE)
1882 error ("-malign-double makes no sense in the 64bit mode");
1884 error ("-mrtd calling convention not supported in the 64bit mode");
1886 /* Enable by default the SSE and MMX builtins. Do allow the user to
1887 explicitly disable any of these. In particular, disabling SSE and
1888 MMX for kernel code is extremely useful. */
1890 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1891 & ~target_flags_explicit);
1895 /* i386 ABI does not specify red zone. It still makes sense to use it
1896 when programmer takes care to stack from being destroyed. */
1897 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1898 target_flags |= MASK_NO_RED_ZONE;
1901 /* Validate -mpreferred-stack-boundary= value, or provide default.
1902 The default of 128 bits is for Pentium III's SSE __m128, but we
1903 don't want additional code to keep the stack aligned when
1904 optimizing for code size. */
1905 ix86_preferred_stack_boundary
1906 = ((TARGET_MACHO || TARGET_SSE || !optimize_size) ? 128 : 32);
1907 if (ix86_preferred_stack_boundary_string)
1909 i = atoi (ix86_preferred_stack_boundary_string);
1910 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1911 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1912 TARGET_64BIT ? 4 : 2);
1914 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1917 /* Accept -msseregparm only if at least SSE support is enabled. */
1918 if (TARGET_SSEREGPARM
1920 error ("-msseregparm used without SSE enabled");
1922 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1924 if (ix86_fpmath_string != 0)
1926 if (! strcmp (ix86_fpmath_string, "387"))
1927 ix86_fpmath = FPMATH_387;
1928 else if (! strcmp (ix86_fpmath_string, "sse"))
1932 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1933 ix86_fpmath = FPMATH_387;
1936 ix86_fpmath = FPMATH_SSE;
1938 else if (! strcmp (ix86_fpmath_string, "387,sse")
1939 || ! strcmp (ix86_fpmath_string, "sse,387"))
1943 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1944 ix86_fpmath = FPMATH_387;
1946 else if (!TARGET_80387)
1948 warning (0, "387 instruction set disabled, using SSE arithmetics");
1949 ix86_fpmath = FPMATH_SSE;
1952 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1955 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1958 /* If the i387 is disabled, then do not return values in it. */
1960 target_flags &= ~MASK_FLOAT_RETURNS;
1962 if ((x86_accumulate_outgoing_args & TUNEMASK)
1963 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1965 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1967 /* ??? Unwind info is not correct around the CFG unless either a frame
1968 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1969 unwind info generation to be aware of the CFG and propagating states
1971 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1972 || flag_exceptions || flag_non_call_exceptions)
1973 && flag_omit_frame_pointer
1974 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1976 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1977 warning (0, "unwind tables currently require either a frame pointer "
1978 "or -maccumulate-outgoing-args for correctness");
1979 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1982 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1985 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1986 p = strchr (internal_label_prefix, 'X');
1987 internal_label_prefix_len = p - internal_label_prefix;
1991 /* When scheduling description is not available, disable scheduler pass
1992 so it won't slow down the compilation and make x87 code slower. */
1993 if (!TARGET_SCHEDULE)
1994 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1997 /* switch to the appropriate section for output of DECL.
1998 DECL is either a `VAR_DECL' node or a constant of some sort.
1999 RELOC indicates whether forming the initial value of DECL requires
2000 link-time relocations. */
2003 x86_64_elf_select_section (tree decl, int reloc,
2004 unsigned HOST_WIDE_INT align)
2006 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2007 && ix86_in_large_data_p (decl))
2009 const char *sname = NULL;
2010 unsigned int flags = SECTION_WRITE;
2011 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2016 case SECCAT_DATA_REL:
2017 sname = ".ldata.rel";
2019 case SECCAT_DATA_REL_LOCAL:
2020 sname = ".ldata.rel.local";
2022 case SECCAT_DATA_REL_RO:
2023 sname = ".ldata.rel.ro";
2025 case SECCAT_DATA_REL_RO_LOCAL:
2026 sname = ".ldata.rel.ro.local";
2030 flags |= SECTION_BSS;
2033 case SECCAT_RODATA_MERGE_STR:
2034 case SECCAT_RODATA_MERGE_STR_INIT:
2035 case SECCAT_RODATA_MERGE_CONST:
2039 case SECCAT_SRODATA:
2046 /* We don't split these for medium model. Place them into
2047 default sections and hope for best. */
2052 /* We might get called with string constants, but get_named_section
2053 doesn't like them as they are not DECLs. Also, we need to set
2054 flags in that case. */
2056 return get_section (sname, flags, NULL);
2057 return get_named_section (decl, sname, reloc);
2060 return default_elf_select_section (decl, reloc, align);
2063 /* Build up a unique section name, expressed as a
2064 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2065 RELOC indicates whether the initial value of EXP requires
2066 link-time relocations. */
2069 x86_64_elf_unique_section (tree decl, int reloc)
2071 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2072 && ix86_in_large_data_p (decl))
2074 const char *prefix = NULL;
2075 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2076 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2078 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2081 case SECCAT_DATA_REL:
2082 case SECCAT_DATA_REL_LOCAL:
2083 case SECCAT_DATA_REL_RO:
2084 case SECCAT_DATA_REL_RO_LOCAL:
2085 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2088 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2091 case SECCAT_RODATA_MERGE_STR:
2092 case SECCAT_RODATA_MERGE_STR_INIT:
2093 case SECCAT_RODATA_MERGE_CONST:
2094 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2096 case SECCAT_SRODATA:
2103 /* We don't split these for medium model. Place them into
2104 default sections and hope for best. */
2112 plen = strlen (prefix);
2114 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2115 name = targetm.strip_name_encoding (name);
2116 nlen = strlen (name);
2118 string = alloca (nlen + plen + 1);
2119 memcpy (string, prefix, plen);
2120 memcpy (string + plen, name, nlen + 1);
2122 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2126 default_unique_section (decl, reloc);
2129 #ifdef COMMON_ASM_OP
2130 /* This says how to output assembler code to declare an
2131 uninitialized external linkage data object.
2133 For medium model x86-64 we need to use .largecomm opcode for
2136 x86_elf_aligned_common (FILE *file,
2137 const char *name, unsigned HOST_WIDE_INT size,
2140 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2141 && size > (unsigned int)ix86_section_threshold)
2142 fprintf (file, ".largecomm\t");
2144 fprintf (file, "%s", COMMON_ASM_OP);
2145 assemble_name (file, name);
2146 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2147 size, align / BITS_PER_UNIT);
2150 /* Utility function for targets to use in implementing
2151 ASM_OUTPUT_ALIGNED_BSS. */
2154 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2155 const char *name, unsigned HOST_WIDE_INT size,
2158 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2159 && size > (unsigned int)ix86_section_threshold)
2160 switch_to_section (get_named_section (decl, ".lbss", 0));
2162 switch_to_section (bss_section);
2163 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2164 #ifdef ASM_DECLARE_OBJECT_NAME
2165 last_assemble_variable_decl = decl;
2166 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2168 /* Standard thing is just output label for the object. */
2169 ASM_OUTPUT_LABEL (file, name);
2170 #endif /* ASM_DECLARE_OBJECT_NAME */
2171 ASM_OUTPUT_SKIP (file, size ? size : 1);
2176 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2178 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2179 make the problem with not enough registers even worse. */
2180 #ifdef INSN_SCHEDULING
2182 flag_schedule_insns = 0;
2186 /* The Darwin libraries never set errno, so we might as well
2187 avoid calling them when that's the only reason we would. */
2188 flag_errno_math = 0;
2190 /* The default values of these switches depend on the TARGET_64BIT
2191 that is not known at this moment. Mark these values with 2 and
2192 let user the to override these. In case there is no command line option
2193 specifying them, we will set the defaults in override_options. */
2195 flag_omit_frame_pointer = 2;
2196 flag_pcc_struct_return = 2;
2197 flag_asynchronous_unwind_tables = 2;
2198 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2199 SUBTARGET_OPTIMIZATION_OPTIONS;
2203 /* Table of valid machine attributes. */
2204 const struct attribute_spec ix86_attribute_table[] =
2206 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2207 /* Stdcall attribute says callee is responsible for popping arguments
2208 if they are not variable. */
2209 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2210 /* Fastcall attribute says callee is responsible for popping arguments
2211 if they are not variable. */
2212 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2213 /* Cdecl attribute says the callee is a normal C declaration */
2214 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2215 /* Regparm attribute specifies how many integer arguments are to be
2216 passed in registers. */
2217 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2218 /* Sseregparm attribute says we are using x86_64 calling conventions
2219 for FP arguments. */
2220 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2221 /* force_align_arg_pointer says this function realigns the stack at entry. */
2222 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2223 false, true, true, ix86_handle_cconv_attribute },
2224 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2225 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2226 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2227 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2229 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2230 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2231 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2232 SUBTARGET_ATTRIBUTE_TABLE,
2234 { NULL, 0, 0, false, false, false, NULL }
2237 /* Decide whether we can make a sibling call to a function. DECL is the
2238 declaration of the function being targeted by the call and EXP is the
2239 CALL_EXPR representing the call. */
2242 ix86_function_ok_for_sibcall (tree decl, tree exp)
2247 /* If we are generating position-independent code, we cannot sibcall
2248 optimize any indirect call, or a direct call to a global function,
2249 as the PLT requires %ebx be live. */
2250 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2257 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2258 if (POINTER_TYPE_P (func))
2259 func = TREE_TYPE (func);
2262 /* Check that the return value locations are the same. Like
2263 if we are returning floats on the 80387 register stack, we cannot
2264 make a sibcall from a function that doesn't return a float to a
2265 function that does or, conversely, from a function that does return
2266 a float to a function that doesn't; the necessary stack adjustment
2267 would not be executed. This is also the place we notice
2268 differences in the return value ABI. Note that it is ok for one
2269 of the functions to have void return type as long as the return
2270 value of the other is passed in a register. */
2271 a = ix86_function_value (TREE_TYPE (exp), func, false);
2272 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2274 if (STACK_REG_P (a) || STACK_REG_P (b))
2276 if (!rtx_equal_p (a, b))
2279 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2281 else if (!rtx_equal_p (a, b))
2284 /* If this call is indirect, we'll need to be able to use a call-clobbered
2285 register for the address of the target function. Make sure that all
2286 such registers are not used for passing parameters. */
2287 if (!decl && !TARGET_64BIT)
2291 /* We're looking at the CALL_EXPR, we need the type of the function. */
2292 type = TREE_OPERAND (exp, 0); /* pointer expression */
2293 type = TREE_TYPE (type); /* pointer type */
2294 type = TREE_TYPE (type); /* function type */
2296 if (ix86_function_regparm (type, NULL) >= 3)
2298 /* ??? Need to count the actual number of registers to be used,
2299 not the possible number of registers. Fix later. */
2304 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2305 /* Dllimport'd functions are also called indirectly. */
2306 if (decl && DECL_DLLIMPORT_P (decl)
2307 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2311 /* If we forced aligned the stack, then sibcalling would unalign the
2312 stack, which may break the called function. */
2313 if (cfun->machine->force_align_arg_pointer)
2316 /* Otherwise okay. That also includes certain types of indirect calls. */
2320 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2321 calling convention attributes;
2322 arguments as in struct attribute_spec.handler. */
2325 ix86_handle_cconv_attribute (tree *node, tree name,
2327 int flags ATTRIBUTE_UNUSED,
2330 if (TREE_CODE (*node) != FUNCTION_TYPE
2331 && TREE_CODE (*node) != METHOD_TYPE
2332 && TREE_CODE (*node) != FIELD_DECL
2333 && TREE_CODE (*node) != TYPE_DECL)
2335 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2336 IDENTIFIER_POINTER (name));
2337 *no_add_attrs = true;
2341 /* Can combine regparm with all attributes but fastcall. */
2342 if (is_attribute_p ("regparm", name))
2346 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2348 error ("fastcall and regparm attributes are not compatible");
2351 cst = TREE_VALUE (args);
2352 if (TREE_CODE (cst) != INTEGER_CST)
2354 warning (OPT_Wattributes,
2355 "%qs attribute requires an integer constant argument",
2356 IDENTIFIER_POINTER (name));
2357 *no_add_attrs = true;
2359 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2361 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2362 IDENTIFIER_POINTER (name), REGPARM_MAX);
2363 *no_add_attrs = true;
2367 && lookup_attribute (ix86_force_align_arg_pointer_string,
2368 TYPE_ATTRIBUTES (*node))
2369 && compare_tree_int (cst, REGPARM_MAX-1))
2371 error ("%s functions limited to %d register parameters",
2372 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2380 warning (OPT_Wattributes, "%qs attribute ignored",
2381 IDENTIFIER_POINTER (name));
2382 *no_add_attrs = true;
2386 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2387 if (is_attribute_p ("fastcall", name))
2389 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2391 error ("fastcall and cdecl attributes are not compatible");
2393 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2395 error ("fastcall and stdcall attributes are not compatible");
2397 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2399 error ("fastcall and regparm attributes are not compatible");
2403 /* Can combine stdcall with fastcall (redundant), regparm and
2405 else if (is_attribute_p ("stdcall", name))
2407 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2409 error ("stdcall and cdecl attributes are not compatible");
2411 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2413 error ("stdcall and fastcall attributes are not compatible");
2417 /* Can combine cdecl with regparm and sseregparm. */
2418 else if (is_attribute_p ("cdecl", name))
2420 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2422 error ("stdcall and cdecl attributes are not compatible");
2424 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2426 error ("fastcall and cdecl attributes are not compatible");
2430 /* Can combine sseregparm with all attributes. */
2435 /* Return 0 if the attributes for two types are incompatible, 1 if they
2436 are compatible, and 2 if they are nearly compatible (which causes a
2437 warning to be generated). */
2440 ix86_comp_type_attributes (tree type1, tree type2)
2442 /* Check for mismatch of non-default calling convention. */
2443 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2445 if (TREE_CODE (type1) != FUNCTION_TYPE)
2448 /* Check for mismatched fastcall/regparm types. */
2449 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2450 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2451 || (ix86_function_regparm (type1, NULL)
2452 != ix86_function_regparm (type2, NULL)))
2455 /* Check for mismatched sseregparm types. */
2456 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2457 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2460 /* Check for mismatched return types (cdecl vs stdcall). */
2461 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2462 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2468 /* Return the regparm value for a function with the indicated TYPE and DECL.
2469 DECL may be NULL when calling function indirectly
2470 or considering a libcall. */
2473 ix86_function_regparm (tree type, tree decl)
2476 int regparm = ix86_regparm;
2477 bool user_convention = false;
2481 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2484 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2485 user_convention = true;
2488 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2491 user_convention = true;
2494 /* Use register calling convention for local functions when possible. */
2495 if (!TARGET_64BIT && !user_convention && decl
2496 && flag_unit_at_a_time && !profile_flag)
2498 struct cgraph_local_info *i = cgraph_local_info (decl);
2501 int local_regparm, globals = 0, regno;
2503 /* Make sure no regparm register is taken by a global register
2505 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2506 if (global_regs[local_regparm])
2508 /* We can't use regparm(3) for nested functions as these use
2509 static chain pointer in third argument. */
2510 if (local_regparm == 3
2511 && decl_function_context (decl)
2512 && !DECL_NO_STATIC_CHAIN (decl))
2514 /* If the function realigns its stackpointer, the
2515 prologue will clobber %ecx. If we've already
2516 generated code for the callee, the callee
2517 DECL_STRUCT_FUNCTION is gone, so we fall back to
2518 scanning the attributes for the self-realigning
2520 if ((DECL_STRUCT_FUNCTION (decl)
2521 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2522 || (!DECL_STRUCT_FUNCTION (decl)
2523 && lookup_attribute (ix86_force_align_arg_pointer_string,
2524 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2526 /* Each global register variable increases register preassure,
2527 so the more global reg vars there are, the smaller regparm
2528 optimization use, unless requested by the user explicitly. */
2529 for (regno = 0; regno < 6; regno++)
2530 if (global_regs[regno])
2533 = globals < local_regparm ? local_regparm - globals : 0;
2535 if (local_regparm > regparm)
2536 regparm = local_regparm;
2543 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2544 in SSE registers for a function with the indicated TYPE and DECL.
2545 DECL may be NULL when calling function indirectly
2546 or considering a libcall. Otherwise return 0. */
2549 ix86_function_sseregparm (tree type, tree decl)
2551 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2552 by the sseregparm attribute. */
2553 if (TARGET_SSEREGPARM
2555 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2560 error ("Calling %qD with attribute sseregparm without "
2561 "SSE/SSE2 enabled", decl);
2563 error ("Calling %qT with attribute sseregparm without "
2564 "SSE/SSE2 enabled", type);
2571 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2572 in SSE registers even for 32-bit mode and not just 3, but up to
2573 8 SSE arguments in registers. */
2574 if (!TARGET_64BIT && decl
2575 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2577 struct cgraph_local_info *i = cgraph_local_info (decl);
2579 return TARGET_SSE2 ? 2 : 1;
2585 /* Return true if EAX is live at the start of the function. Used by
2586 ix86_expand_prologue to determine if we need special help before
2587 calling allocate_stack_worker. */
2590 ix86_eax_live_at_start_p (void)
2592 /* Cheat. Don't bother working forward from ix86_function_regparm
2593 to the function type to whether an actual argument is located in
2594 eax. Instead just look at cfg info, which is still close enough
2595 to correct at this point. This gives false positives for broken
2596 functions that might use uninitialized data that happens to be
2597 allocated in eax, but who cares? */
2598 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2601 /* Value is the number of bytes of arguments automatically
2602 popped when returning from a subroutine call.
2603 FUNDECL is the declaration node of the function (as a tree),
2604 FUNTYPE is the data type of the function (as a tree),
2605 or for a library call it is an identifier node for the subroutine name.
2606 SIZE is the number of bytes of arguments passed on the stack.
2608 On the 80386, the RTD insn may be used to pop them if the number
2609 of args is fixed, but if the number is variable then the caller
2610 must pop them all. RTD can't be used for library calls now
2611 because the library is compiled with the Unix compiler.
2612 Use of RTD is a selectable option, since it is incompatible with
2613 standard Unix calling sequences. If the option is not selected,
2614 the caller must always pop the args.
2616 The attribute stdcall is equivalent to RTD on a per module basis. */
2619 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2621 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2623 /* Cdecl functions override -mrtd, and never pop the stack. */
2624 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2626 /* Stdcall and fastcall functions will pop the stack if not
2628 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2629 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2633 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2634 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2635 == void_type_node)))
2639 /* Lose any fake structure return argument if it is passed on the stack. */
2640 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2642 && !KEEP_AGGREGATE_RETURN_POINTER)
2644 int nregs = ix86_function_regparm (funtype, fundecl);
2647 return GET_MODE_SIZE (Pmode);
2653 /* Argument support functions. */
2655 /* Return true when register may be used to pass function parameters. */
2657 ix86_function_arg_regno_p (int regno)
2661 return (regno < REGPARM_MAX
2662 || (TARGET_MMX && MMX_REGNO_P (regno)
2663 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2664 || (TARGET_SSE && SSE_REGNO_P (regno)
2665 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2667 if (TARGET_SSE && SSE_REGNO_P (regno)
2668 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2670 /* RAX is used as hidden argument to va_arg functions. */
2673 for (i = 0; i < REGPARM_MAX; i++)
2674 if (regno == x86_64_int_parameter_registers[i])
2679 /* Return if we do not know how to pass TYPE solely in registers. */
2682 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2684 if (must_pass_in_stack_var_size_or_pad (mode, type))
2687 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2688 The layout_type routine is crafty and tries to trick us into passing
2689 currently unsupported vector types on the stack by using TImode. */
2690 return (!TARGET_64BIT && mode == TImode
2691 && type && TREE_CODE (type) != VECTOR_TYPE);
2694 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2695 for a call to a function whose data type is FNTYPE.
2696 For a library call, FNTYPE is 0. */
2699 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2700 tree fntype, /* tree ptr for function decl */
2701 rtx libname, /* SYMBOL_REF of library name or 0 */
2704 static CUMULATIVE_ARGS zero_cum;
2705 tree param, next_param;
2707 if (TARGET_DEBUG_ARG)
2709 fprintf (stderr, "\ninit_cumulative_args (");
2711 fprintf (stderr, "fntype code = %s, ret code = %s",
2712 tree_code_name[(int) TREE_CODE (fntype)],
2713 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2715 fprintf (stderr, "no fntype");
2718 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2723 /* Set up the number of registers to use for passing arguments. */
2724 cum->nregs = ix86_regparm;
2726 cum->sse_nregs = SSE_REGPARM_MAX;
2728 cum->mmx_nregs = MMX_REGPARM_MAX;
2729 cum->warn_sse = true;
2730 cum->warn_mmx = true;
2731 cum->maybe_vaarg = false;
2733 /* Use ecx and edx registers if function has fastcall attribute,
2734 else look for regparm information. */
2735 if (fntype && !TARGET_64BIT)
2737 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2743 cum->nregs = ix86_function_regparm (fntype, fndecl);
2746 /* Set up the number of SSE registers used for passing SFmode
2747 and DFmode arguments. Warn for mismatching ABI. */
2748 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2750 /* Determine if this function has variable arguments. This is
2751 indicated by the last argument being 'void_type_mode' if there
2752 are no variable arguments. If there are variable arguments, then
2753 we won't pass anything in registers in 32-bit mode. */
2755 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2757 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2758 param != 0; param = next_param)
2760 next_param = TREE_CHAIN (param);
2761 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2771 cum->float_in_sse = 0;
2773 cum->maybe_vaarg = true;
2777 if ((!fntype && !libname)
2778 || (fntype && !TYPE_ARG_TYPES (fntype)))
2779 cum->maybe_vaarg = true;
2781 if (TARGET_DEBUG_ARG)
2782 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2787 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2788 But in the case of vector types, it is some vector mode.
2790 When we have only some of our vector isa extensions enabled, then there
2791 are some modes for which vector_mode_supported_p is false. For these
2792 modes, the generic vector support in gcc will choose some non-vector mode
2793 in order to implement the type. By computing the natural mode, we'll
2794 select the proper ABI location for the operand and not depend on whatever
2795 the middle-end decides to do with these vector types. */
2797 static enum machine_mode
2798 type_natural_mode (tree type)
2800 enum machine_mode mode = TYPE_MODE (type);
2802 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2804 HOST_WIDE_INT size = int_size_in_bytes (type);
2805 if ((size == 8 || size == 16)
2806 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2807 && TYPE_VECTOR_SUBPARTS (type) > 1)
2809 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2811 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2812 mode = MIN_MODE_VECTOR_FLOAT;
2814 mode = MIN_MODE_VECTOR_INT;
2816 /* Get the mode which has this inner mode and number of units. */
2817 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2818 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2819 && GET_MODE_INNER (mode) == innermode)
2829 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2830 this may not agree with the mode that the type system has chosen for the
2831 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2832 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2835 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2840 if (orig_mode != BLKmode)
2841 tmp = gen_rtx_REG (orig_mode, regno);
2844 tmp = gen_rtx_REG (mode, regno);
2845 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2846 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2852 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2853 of this code is to classify each 8bytes of incoming argument by the register
2854 class and assign registers accordingly. */
2856 /* Return the union class of CLASS1 and CLASS2.
2857 See the x86-64 PS ABI for details. */
2859 static enum x86_64_reg_class
2860 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2862 /* Rule #1: If both classes are equal, this is the resulting class. */
2863 if (class1 == class2)
2866 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2868 if (class1 == X86_64_NO_CLASS)
2870 if (class2 == X86_64_NO_CLASS)
2873 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2874 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2875 return X86_64_MEMORY_CLASS;
2877 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2878 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2879 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2880 return X86_64_INTEGERSI_CLASS;
2881 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2882 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2883 return X86_64_INTEGER_CLASS;
2885 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2887 if (class1 == X86_64_X87_CLASS
2888 || class1 == X86_64_X87UP_CLASS
2889 || class1 == X86_64_COMPLEX_X87_CLASS
2890 || class2 == X86_64_X87_CLASS
2891 || class2 == X86_64_X87UP_CLASS
2892 || class2 == X86_64_COMPLEX_X87_CLASS)
2893 return X86_64_MEMORY_CLASS;
2895 /* Rule #6: Otherwise class SSE is used. */
2896 return X86_64_SSE_CLASS;
2899 /* Classify the argument of type TYPE and mode MODE.
2900 CLASSES will be filled by the register class used to pass each word
2901 of the operand. The number of words is returned. In case the parameter
2902 should be passed in memory, 0 is returned. As a special case for zero
2903 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2905 BIT_OFFSET is used internally for handling records and specifies offset
2906 of the offset in bits modulo 256 to avoid overflow cases.
2908 See the x86-64 PS ABI for details.
2912 classify_argument (enum machine_mode mode, tree type,
2913 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2915 HOST_WIDE_INT bytes =
2916 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2917 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2919 /* Variable sized entities are always passed/returned in memory. */
2923 if (mode != VOIDmode
2924 && targetm.calls.must_pass_in_stack (mode, type))
2927 if (type && AGGREGATE_TYPE_P (type))
2931 enum x86_64_reg_class subclasses[MAX_CLASSES];
2933 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2937 for (i = 0; i < words; i++)
2938 classes[i] = X86_64_NO_CLASS;
2940 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2941 signalize memory class, so handle it as special case. */
2944 classes[0] = X86_64_NO_CLASS;
2948 /* Classify each field of record and merge classes. */
2949 switch (TREE_CODE (type))
2952 /* For classes first merge in the field of the subclasses. */
2953 if (TYPE_BINFO (type))
2955 tree binfo, base_binfo;
2958 for (binfo = TYPE_BINFO (type), basenum = 0;
2959 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2962 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2963 tree type = BINFO_TYPE (base_binfo);
2965 num = classify_argument (TYPE_MODE (type),
2967 (offset + bit_offset) % 256);
2970 for (i = 0; i < num; i++)
2972 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2974 merge_classes (subclasses[i], classes[i + pos]);
2978 /* And now merge the fields of structure. */
2979 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2981 if (TREE_CODE (field) == FIELD_DECL)
2985 if (TREE_TYPE (field) == error_mark_node)
2988 /* Bitfields are always classified as integer. Handle them
2989 early, since later code would consider them to be
2990 misaligned integers. */
2991 if (DECL_BIT_FIELD (field))
2993 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2994 i < ((int_bit_position (field) + (bit_offset % 64))
2995 + tree_low_cst (DECL_SIZE (field), 0)
2998 merge_classes (X86_64_INTEGER_CLASS,
3003 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3004 TREE_TYPE (field), subclasses,
3005 (int_bit_position (field)
3006 + bit_offset) % 256);
3009 for (i = 0; i < num; i++)
3012 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3014 merge_classes (subclasses[i], classes[i + pos]);
3022 /* Arrays are handled as small records. */
3025 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3026 TREE_TYPE (type), subclasses, bit_offset);
3030 /* The partial classes are now full classes. */
3031 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3032 subclasses[0] = X86_64_SSE_CLASS;
3033 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3034 subclasses[0] = X86_64_INTEGER_CLASS;
3036 for (i = 0; i < words; i++)
3037 classes[i] = subclasses[i % num];
3042 case QUAL_UNION_TYPE:
3043 /* Unions are similar to RECORD_TYPE but offset is always 0.
3046 /* Unions are not derived. */
3047 gcc_assert (!TYPE_BINFO (type)
3048 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3049 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3051 if (TREE_CODE (field) == FIELD_DECL)
3055 if (TREE_TYPE (field) == error_mark_node)
3058 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3059 TREE_TYPE (field), subclasses,
3063 for (i = 0; i < num; i++)
3064 classes[i] = merge_classes (subclasses[i], classes[i]);
3073 /* Final merger cleanup. */
3074 for (i = 0; i < words; i++)
3076 /* If one class is MEMORY, everything should be passed in
3078 if (classes[i] == X86_64_MEMORY_CLASS)
3081 /* The X86_64_SSEUP_CLASS should be always preceded by
3082 X86_64_SSE_CLASS. */
3083 if (classes[i] == X86_64_SSEUP_CLASS
3084 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3085 classes[i] = X86_64_SSE_CLASS;
3087 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3088 if (classes[i] == X86_64_X87UP_CLASS
3089 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3090 classes[i] = X86_64_SSE_CLASS;
3095 /* Compute alignment needed. We align all types to natural boundaries with
3096 exception of XFmode that is aligned to 64bits. */
3097 if (mode != VOIDmode && mode != BLKmode)
3099 int mode_alignment = GET_MODE_BITSIZE (mode);
3102 mode_alignment = 128;
3103 else if (mode == XCmode)
3104 mode_alignment = 256;
3105 if (COMPLEX_MODE_P (mode))
3106 mode_alignment /= 2;
3107 /* Misaligned fields are always returned in memory. */
3108 if (bit_offset % mode_alignment)
3112 /* for V1xx modes, just use the base mode */
3113 if (VECTOR_MODE_P (mode)
3114 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3115 mode = GET_MODE_INNER (mode);
3117 /* Classification of atomic types. */
3122 classes[0] = X86_64_SSE_CLASS;
3125 classes[0] = X86_64_SSE_CLASS;
3126 classes[1] = X86_64_SSEUP_CLASS;
3135 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3136 classes[0] = X86_64_INTEGERSI_CLASS;
3138 classes[0] = X86_64_INTEGER_CLASS;
3142 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3147 if (!(bit_offset % 64))
3148 classes[0] = X86_64_SSESF_CLASS;
3150 classes[0] = X86_64_SSE_CLASS;
3153 classes[0] = X86_64_SSEDF_CLASS;
3156 classes[0] = X86_64_X87_CLASS;
3157 classes[1] = X86_64_X87UP_CLASS;
3160 classes[0] = X86_64_SSE_CLASS;
3161 classes[1] = X86_64_SSEUP_CLASS;
3164 classes[0] = X86_64_SSE_CLASS;
3167 classes[0] = X86_64_SSEDF_CLASS;
3168 classes[1] = X86_64_SSEDF_CLASS;
3171 classes[0] = X86_64_COMPLEX_X87_CLASS;
3174 /* This modes is larger than 16 bytes. */
3182 classes[0] = X86_64_SSE_CLASS;
3183 classes[1] = X86_64_SSEUP_CLASS;
3189 classes[0] = X86_64_SSE_CLASS;
3195 gcc_assert (VECTOR_MODE_P (mode));
3200 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3202 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3203 classes[0] = X86_64_INTEGERSI_CLASS;
3205 classes[0] = X86_64_INTEGER_CLASS;
3206 classes[1] = X86_64_INTEGER_CLASS;
3207 return 1 + (bytes > 8);
3211 /* Examine the argument and return set number of register required in each
3212 class. Return 0 iff parameter should be passed in memory. */
3214 examine_argument (enum machine_mode mode, tree type, int in_return,
3215 int *int_nregs, int *sse_nregs)
3217 enum x86_64_reg_class class[MAX_CLASSES];
3218 int n = classify_argument (mode, type, class, 0);
3224 for (n--; n >= 0; n--)
3227 case X86_64_INTEGER_CLASS:
3228 case X86_64_INTEGERSI_CLASS:
3231 case X86_64_SSE_CLASS:
3232 case X86_64_SSESF_CLASS:
3233 case X86_64_SSEDF_CLASS:
3236 case X86_64_NO_CLASS:
3237 case X86_64_SSEUP_CLASS:
3239 case X86_64_X87_CLASS:
3240 case X86_64_X87UP_CLASS:
3244 case X86_64_COMPLEX_X87_CLASS:
3245 return in_return ? 2 : 0;
3246 case X86_64_MEMORY_CLASS:
3252 /* Construct container for the argument used by GCC interface. See
3253 FUNCTION_ARG for the detailed description. */
3256 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3257 tree type, int in_return, int nintregs, int nsseregs,
3258 const int *intreg, int sse_regno)
3260 /* The following variables hold the static issued_error state. */
3261 static bool issued_sse_arg_error;
3262 static bool issued_sse_ret_error;
3263 static bool issued_x87_ret_error;
3265 enum machine_mode tmpmode;
3267 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3268 enum x86_64_reg_class class[MAX_CLASSES];
3272 int needed_sseregs, needed_intregs;
3273 rtx exp[MAX_CLASSES];
3276 n = classify_argument (mode, type, class, 0);
3277 if (TARGET_DEBUG_ARG)
3280 fprintf (stderr, "Memory class\n");
3283 fprintf (stderr, "Classes:");
3284 for (i = 0; i < n; i++)
3286 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3288 fprintf (stderr, "\n");
3293 if (!examine_argument (mode, type, in_return, &needed_intregs,
3296 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3299 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3300 some less clueful developer tries to use floating-point anyway. */
3301 if (needed_sseregs && !TARGET_SSE)
3305 if (!issued_sse_ret_error)
3307 error ("SSE register return with SSE disabled");
3308 issued_sse_ret_error = true;
3311 else if (!issued_sse_arg_error)
3313 error ("SSE register argument with SSE disabled");
3314 issued_sse_arg_error = true;
3319 /* Likewise, error if the ABI requires us to return values in the
3320 x87 registers and the user specified -mno-80387. */
3321 if (!TARGET_80387 && in_return)
3322 for (i = 0; i < n; i++)
3323 if (class[i] == X86_64_X87_CLASS
3324 || class[i] == X86_64_X87UP_CLASS
3325 || class[i] == X86_64_COMPLEX_X87_CLASS)
3327 if (!issued_x87_ret_error)
3329 error ("x87 register return with x87 disabled");
3330 issued_x87_ret_error = true;
3335 /* First construct simple cases. Avoid SCmode, since we want to use
3336 single register to pass this type. */
3337 if (n == 1 && mode != SCmode)
3340 case X86_64_INTEGER_CLASS:
3341 case X86_64_INTEGERSI_CLASS:
3342 return gen_rtx_REG (mode, intreg[0]);
3343 case X86_64_SSE_CLASS:
3344 case X86_64_SSESF_CLASS:
3345 case X86_64_SSEDF_CLASS:
3346 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3347 case X86_64_X87_CLASS:
3348 case X86_64_COMPLEX_X87_CLASS:
3349 return gen_rtx_REG (mode, FIRST_STACK_REG);
3350 case X86_64_NO_CLASS:
3351 /* Zero sized array, struct or class. */
3356 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3358 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3360 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3361 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3362 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3363 && class[1] == X86_64_INTEGER_CLASS
3364 && (mode == CDImode || mode == TImode || mode == TFmode)
3365 && intreg[0] + 1 == intreg[1])
3366 return gen_rtx_REG (mode, intreg[0]);
3368 /* Otherwise figure out the entries of the PARALLEL. */
3369 for (i = 0; i < n; i++)
3373 case X86_64_NO_CLASS:
3375 case X86_64_INTEGER_CLASS:
3376 case X86_64_INTEGERSI_CLASS:
3377 /* Merge TImodes on aligned occasions here too. */
3378 if (i * 8 + 8 > bytes)
3379 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3380 else if (class[i] == X86_64_INTEGERSI_CLASS)
3384 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3385 if (tmpmode == BLKmode)
3387 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3388 gen_rtx_REG (tmpmode, *intreg),
3392 case X86_64_SSESF_CLASS:
3393 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3394 gen_rtx_REG (SFmode,
3395 SSE_REGNO (sse_regno)),
3399 case X86_64_SSEDF_CLASS:
3400 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3401 gen_rtx_REG (DFmode,
3402 SSE_REGNO (sse_regno)),
3406 case X86_64_SSE_CLASS:
3407 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3411 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3412 gen_rtx_REG (tmpmode,
3413 SSE_REGNO (sse_regno)),
3415 if (tmpmode == TImode)
3424 /* Empty aligned struct, union or class. */
3428 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3429 for (i = 0; i < nexps; i++)
3430 XVECEXP (ret, 0, i) = exp [i];
3434 /* Update the data in CUM to advance over an argument
3435 of mode MODE and data type TYPE.
3436 (TYPE is null for libcalls where that information may not be available.) */
3439 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3440 tree type, int named)
3443 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3444 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3447 mode = type_natural_mode (type);
3449 if (TARGET_DEBUG_ARG)
3450 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3451 "mode=%s, named=%d)\n\n",
3452 words, cum->words, cum->nregs, cum->sse_nregs,
3453 GET_MODE_NAME (mode), named);
3457 int int_nregs, sse_nregs;
3458 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3459 cum->words += words;
3460 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3462 cum->nregs -= int_nregs;
3463 cum->sse_nregs -= sse_nregs;
3464 cum->regno += int_nregs;
3465 cum->sse_regno += sse_nregs;
3468 cum->words += words;
3486 cum->words += words;
3487 cum->nregs -= words;
3488 cum->regno += words;
3490 if (cum->nregs <= 0)
3498 if (cum->float_in_sse < 2)
3501 if (cum->float_in_sse < 1)
3512 if (!type || !AGGREGATE_TYPE_P (type))
3514 cum->sse_words += words;
3515 cum->sse_nregs -= 1;
3516 cum->sse_regno += 1;
3517 if (cum->sse_nregs <= 0)
3529 if (!type || !AGGREGATE_TYPE_P (type))
3531 cum->mmx_words += words;
3532 cum->mmx_nregs -= 1;
3533 cum->mmx_regno += 1;
3534 if (cum->mmx_nregs <= 0)
3545 /* Define where to put the arguments to a function.
3546 Value is zero to push the argument on the stack,
3547 or a hard register in which to store the argument.
3549 MODE is the argument's machine mode.
3550 TYPE is the data type of the argument (as a tree).
3551 This is null for libcalls where that information may
3553 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3554 the preceding args and about the function being called.
3555 NAMED is nonzero if this argument is a named parameter
3556 (otherwise it is an extra parameter matching an ellipsis). */
3559 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3560 tree type, int named)
3562 enum machine_mode mode = orig_mode;
3565 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3566 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3567 static bool warnedsse, warnedmmx;
3569 /* To simplify the code below, represent vector types with a vector mode
3570 even if MMX/SSE are not active. */
3571 if (type && TREE_CODE (type) == VECTOR_TYPE)
3572 mode = type_natural_mode (type);
3574 /* Handle a hidden AL argument containing number of registers for varargs
3575 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3577 if (mode == VOIDmode)
3580 return GEN_INT (cum->maybe_vaarg
3581 ? (cum->sse_nregs < 0
3589 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3591 &x86_64_int_parameter_registers [cum->regno],
3596 /* For now, pass fp/complex values on the stack. */
3608 if (words <= cum->nregs)
3610 int regno = cum->regno;
3612 /* Fastcall allocates the first two DWORD (SImode) or
3613 smaller arguments to ECX and EDX. */
3616 if (mode == BLKmode || mode == DImode)
3619 /* ECX not EAX is the first allocated register. */
3623 ret = gen_rtx_REG (mode, regno);
3627 if (cum->float_in_sse < 2)
3630 if (cum->float_in_sse < 1)
3640 if (!type || !AGGREGATE_TYPE_P (type))
3642 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3645 warning (0, "SSE vector argument without SSE enabled "
3649 ret = gen_reg_or_parallel (mode, orig_mode,
3650 cum->sse_regno + FIRST_SSE_REG);
3657 if (!type || !AGGREGATE_TYPE_P (type))
3659 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3662 warning (0, "MMX vector argument without MMX enabled "
3666 ret = gen_reg_or_parallel (mode, orig_mode,
3667 cum->mmx_regno + FIRST_MMX_REG);
3672 if (TARGET_DEBUG_ARG)
3675 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3676 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3679 print_simple_rtl (stderr, ret);
3681 fprintf (stderr, ", stack");
3683 fprintf (stderr, " )\n");
3689 /* A C expression that indicates when an argument must be passed by
3690 reference. If nonzero for an argument, a copy of that argument is
3691 made in memory and a pointer to the argument is passed instead of
3692 the argument itself. The pointer is passed in whatever way is
3693 appropriate for passing a pointer to that type. */
3696 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3697 enum machine_mode mode ATTRIBUTE_UNUSED,
3698 tree type, bool named ATTRIBUTE_UNUSED)
3703 if (type && int_size_in_bytes (type) == -1)
3705 if (TARGET_DEBUG_ARG)
3706 fprintf (stderr, "function_arg_pass_by_reference\n");
3713 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3714 ABI. Only called if TARGET_SSE. */
3716 contains_128bit_aligned_vector_p (tree type)
3718 enum machine_mode mode = TYPE_MODE (type);
3719 if (SSE_REG_MODE_P (mode)
3720 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3722 if (TYPE_ALIGN (type) < 128)
3725 if (AGGREGATE_TYPE_P (type))
3727 /* Walk the aggregates recursively. */
3728 switch (TREE_CODE (type))
3732 case QUAL_UNION_TYPE:
3736 if (TYPE_BINFO (type))
3738 tree binfo, base_binfo;
3741 for (binfo = TYPE_BINFO (type), i = 0;
3742 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3743 if (contains_128bit_aligned_vector_p
3744 (BINFO_TYPE (base_binfo)))
3747 /* And now merge the fields of structure. */
3748 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3750 if (TREE_CODE (field) == FIELD_DECL
3751 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3758 /* Just for use if some languages passes arrays by value. */
3759 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3770 /* Gives the alignment boundary, in bits, of an argument with the
3771 specified mode and type. */
3774 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3778 align = TYPE_ALIGN (type);
3780 align = GET_MODE_ALIGNMENT (mode);
3781 if (align < PARM_BOUNDARY)
3782 align = PARM_BOUNDARY;
3785 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3786 make an exception for SSE modes since these require 128bit
3789 The handling here differs from field_alignment. ICC aligns MMX
3790 arguments to 4 byte boundaries, while structure fields are aligned
3791 to 8 byte boundaries. */
3793 align = PARM_BOUNDARY;
3796 if (!SSE_REG_MODE_P (mode))
3797 align = PARM_BOUNDARY;
3801 if (!contains_128bit_aligned_vector_p (type))
3802 align = PARM_BOUNDARY;
3810 /* Return true if N is a possible register number of function value. */
3812 ix86_function_value_regno_p (int regno)
3815 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3816 || (regno == FIRST_SSE_REG && TARGET_SSE))
3820 && (regno == FIRST_MMX_REG && TARGET_MMX))
3826 /* Define how to find the value returned by a function.
3827 VALTYPE is the data type of the value (as a tree).
3828 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3829 otherwise, FUNC is 0. */
3831 ix86_function_value (tree valtype, tree fntype_or_decl,
3832 bool outgoing ATTRIBUTE_UNUSED)
3834 enum machine_mode natmode = type_natural_mode (valtype);
3838 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3839 1, REGPARM_MAX, SSE_REGPARM_MAX,
3840 x86_64_int_return_registers, 0);
3841 /* For zero sized structures, construct_container return NULL, but we
3842 need to keep rest of compiler happy by returning meaningful value. */
3844 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3849 tree fn = NULL_TREE, fntype;
3851 && DECL_P (fntype_or_decl))
3852 fn = fntype_or_decl;
3853 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3854 return gen_rtx_REG (TYPE_MODE (valtype),
3855 ix86_value_regno (natmode, fn, fntype));
3859 /* Return true iff type is returned in memory. */
3861 ix86_return_in_memory (tree type)
3863 int needed_intregs, needed_sseregs, size;
3864 enum machine_mode mode = type_natural_mode (type);
3867 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3869 if (mode == BLKmode)
3872 size = int_size_in_bytes (type);
3874 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3877 if (VECTOR_MODE_P (mode) || mode == TImode)
3879 /* User-created vectors small enough to fit in EAX. */
3883 /* MMX/3dNow values are returned in MM0,
3884 except when it doesn't exits. */
3886 return (TARGET_MMX ? 0 : 1);
3888 /* SSE values are returned in XMM0, except when it doesn't exist. */
3890 return (TARGET_SSE ? 0 : 1);
3904 /* When returning SSE vector types, we have a choice of either
3905 (1) being abi incompatible with a -march switch, or
3906 (2) generating an error.
3907 Given no good solution, I think the safest thing is one warning.
3908 The user won't be able to use -Werror, but....
3910 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3911 called in response to actually generating a caller or callee that
3912 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3913 via aggregate_value_p for general type probing from tree-ssa. */
3916 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3918 static bool warnedsse, warnedmmx;
3922 /* Look at the return type of the function, not the function type. */
3923 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3925 if (!TARGET_SSE && !warnedsse)
3928 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3931 warning (0, "SSE vector return without SSE enabled "
3936 if (!TARGET_MMX && !warnedmmx)
3938 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3941 warning (0, "MMX vector return without MMX enabled "
3950 /* Define how to find the value returned by a library function
3951 assuming the value has mode MODE. */
3953 ix86_libcall_value (enum machine_mode mode)
3967 return gen_rtx_REG (mode, FIRST_SSE_REG);
3970 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3974 return gen_rtx_REG (mode, 0);
3978 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3981 /* Given a mode, return the register to use for a return value. */
3984 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3986 gcc_assert (!TARGET_64BIT);
3988 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3989 we normally prevent this case when mmx is not available. However
3990 some ABIs may require the result to be returned like DImode. */
3991 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3992 return TARGET_MMX ? FIRST_MMX_REG : 0;
3994 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3995 we prevent this case when sse is not available. However some ABIs
3996 may require the result to be returned like integer TImode. */
3997 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3998 return TARGET_SSE ? FIRST_SSE_REG : 0;
4000 /* Decimal floating point values can go in %eax, unlike other float modes. */
4001 if (DECIMAL_FLOAT_MODE_P (mode))
4004 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4005 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4008 /* Floating point return values in %st(0), except for local functions when
4009 SSE math is enabled or for functions with sseregparm attribute. */
4010 if ((func || fntype)
4011 && (mode == SFmode || mode == DFmode))
4013 int sse_level = ix86_function_sseregparm (fntype, func);
4014 if ((sse_level >= 1 && mode == SFmode)
4015 || (sse_level == 2 && mode == DFmode))
4016 return FIRST_SSE_REG;
4019 return FIRST_FLOAT_REG;
4022 /* Create the va_list data type. */
4025 ix86_build_builtin_va_list (void)
4027 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4029 /* For i386 we use plain pointer to argument area. */
4031 return build_pointer_type (char_type_node);
4033 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4034 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4036 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4037 unsigned_type_node);
4038 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4039 unsigned_type_node);
4040 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4042 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4045 va_list_gpr_counter_field = f_gpr;
4046 va_list_fpr_counter_field = f_fpr;
4048 DECL_FIELD_CONTEXT (f_gpr) = record;
4049 DECL_FIELD_CONTEXT (f_fpr) = record;
4050 DECL_FIELD_CONTEXT (f_ovf) = record;
4051 DECL_FIELD_CONTEXT (f_sav) = record;
4053 TREE_CHAIN (record) = type_decl;
4054 TYPE_NAME (record) = type_decl;
4055 TYPE_FIELDS (record) = f_gpr;
4056 TREE_CHAIN (f_gpr) = f_fpr;
4057 TREE_CHAIN (f_fpr) = f_ovf;
4058 TREE_CHAIN (f_ovf) = f_sav;
4060 layout_type (record);
4062 /* The correct type is an array type of one element. */
4063 return build_array_type (record, build_index_type (size_zero_node));
4066 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4069 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4070 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4073 CUMULATIVE_ARGS next_cum;
4074 rtx save_area = NULL_RTX, mem;
4087 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4090 /* Indicate to allocate space on the stack for varargs save area. */
4091 ix86_save_varrargs_registers = 1;
4093 cfun->stack_alignment_needed = 128;
4095 fntype = TREE_TYPE (current_function_decl);
4096 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4097 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4098 != void_type_node));
4100 /* For varargs, we do not want to skip the dummy va_dcl argument.
4101 For stdargs, we do want to skip the last named argument. */
4104 function_arg_advance (&next_cum, mode, type, 1);
4107 save_area = frame_pointer_rtx;
4109 set = get_varargs_alias_set ();
4111 for (i = next_cum.regno;
4113 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4116 mem = gen_rtx_MEM (Pmode,
4117 plus_constant (save_area, i * UNITS_PER_WORD));
4118 MEM_NOTRAP_P (mem) = 1;
4119 set_mem_alias_set (mem, set);
4120 emit_move_insn (mem, gen_rtx_REG (Pmode,
4121 x86_64_int_parameter_registers[i]));
4124 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4126 /* Now emit code to save SSE registers. The AX parameter contains number
4127 of SSE parameter registers used to call this function. We use
4128 sse_prologue_save insn template that produces computed jump across
4129 SSE saves. We need some preparation work to get this working. */
4131 label = gen_label_rtx ();
4132 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4134 /* Compute address to jump to :
4135 label - 5*eax + nnamed_sse_arguments*5 */
4136 tmp_reg = gen_reg_rtx (Pmode);
4137 nsse_reg = gen_reg_rtx (Pmode);
4138 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4139 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4140 gen_rtx_MULT (Pmode, nsse_reg,
4142 if (next_cum.sse_regno)
4145 gen_rtx_CONST (DImode,
4146 gen_rtx_PLUS (DImode,
4148 GEN_INT (next_cum.sse_regno * 4))));
4150 emit_move_insn (nsse_reg, label_ref);
4151 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4153 /* Compute address of memory block we save into. We always use pointer
4154 pointing 127 bytes after first byte to store - this is needed to keep
4155 instruction size limited by 4 bytes. */
4156 tmp_reg = gen_reg_rtx (Pmode);
4157 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4158 plus_constant (save_area,
4159 8 * REGPARM_MAX + 127)));
4160 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4161 MEM_NOTRAP_P (mem) = 1;
4162 set_mem_alias_set (mem, set);
4163 set_mem_align (mem, BITS_PER_WORD);
4165 /* And finally do the dirty job! */
4166 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4167 GEN_INT (next_cum.sse_regno), label));
4172 /* Implement va_start. */
4175 ix86_va_start (tree valist, rtx nextarg)
4177 HOST_WIDE_INT words, n_gpr, n_fpr;
4178 tree f_gpr, f_fpr, f_ovf, f_sav;
4179 tree gpr, fpr, ovf, sav, t;
4182 /* Only 64bit target needs something special. */
4185 std_expand_builtin_va_start (valist, nextarg);
4189 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4190 f_fpr = TREE_CHAIN (f_gpr);
4191 f_ovf = TREE_CHAIN (f_fpr);
4192 f_sav = TREE_CHAIN (f_ovf);
4194 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4195 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4196 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4197 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4198 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4200 /* Count number of gp and fp argument registers used. */
4201 words = current_function_args_info.words;
4202 n_gpr = current_function_args_info.regno;
4203 n_fpr = current_function_args_info.sse_regno;
4205 if (TARGET_DEBUG_ARG)
4206 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4207 (int) words, (int) n_gpr, (int) n_fpr);
4209 if (cfun->va_list_gpr_size)
4211 type = TREE_TYPE (gpr);
4212 t = build2 (MODIFY_EXPR, type, gpr,
4213 build_int_cst (type, n_gpr * 8));
4214 TREE_SIDE_EFFECTS (t) = 1;
4215 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4218 if (cfun->va_list_fpr_size)
4220 type = TREE_TYPE (fpr);
4221 t = build2 (MODIFY_EXPR, type, fpr,
4222 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4223 TREE_SIDE_EFFECTS (t) = 1;
4224 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4227 /* Find the overflow area. */
4228 type = TREE_TYPE (ovf);
4229 t = make_tree (type, virtual_incoming_args_rtx);
4231 t = build2 (PLUS_EXPR, type, t,
4232 build_int_cst (type, words * UNITS_PER_WORD));
4233 t = build2 (MODIFY_EXPR, type, ovf, t);
4234 TREE_SIDE_EFFECTS (t) = 1;
4235 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4237 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4239 /* Find the register save area.
4240 Prologue of the function save it right above stack frame. */
4241 type = TREE_TYPE (sav);
4242 t = make_tree (type, frame_pointer_rtx);
4243 t = build2 (MODIFY_EXPR, type, sav, t);
4244 TREE_SIDE_EFFECTS (t) = 1;
4245 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4249 /* Implement va_arg. */
4252 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4254 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4255 tree f_gpr, f_fpr, f_ovf, f_sav;
4256 tree gpr, fpr, ovf, sav, t;
4258 tree lab_false, lab_over = NULL_TREE;
4263 enum machine_mode nat_mode;
4265 /* Only 64bit target needs something special. */
4267 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4269 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4270 f_fpr = TREE_CHAIN (f_gpr);
4271 f_ovf = TREE_CHAIN (f_fpr);
4272 f_sav = TREE_CHAIN (f_ovf);
4274 valist = build_va_arg_indirect_ref (valist);
4275 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4276 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4277 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4278 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4280 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4282 type = build_pointer_type (type);
4283 size = int_size_in_bytes (type);
4284 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4286 nat_mode = type_natural_mode (type);
4287 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4288 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4290 /* Pull the value out of the saved registers. */
4292 addr = create_tmp_var (ptr_type_node, "addr");
4293 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4297 int needed_intregs, needed_sseregs;
4299 tree int_addr, sse_addr;
4301 lab_false = create_artificial_label ();
4302 lab_over = create_artificial_label ();
4304 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4306 need_temp = (!REG_P (container)
4307 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4308 || TYPE_ALIGN (type) > 128));
4310 /* In case we are passing structure, verify that it is consecutive block
4311 on the register save area. If not we need to do moves. */
4312 if (!need_temp && !REG_P (container))
4314 /* Verify that all registers are strictly consecutive */
4315 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4319 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4321 rtx slot = XVECEXP (container, 0, i);
4322 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4323 || INTVAL (XEXP (slot, 1)) != i * 16)
4331 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4333 rtx slot = XVECEXP (container, 0, i);
4334 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4335 || INTVAL (XEXP (slot, 1)) != i * 8)
4347 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4348 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4349 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4350 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4353 /* First ensure that we fit completely in registers. */
4356 t = build_int_cst (TREE_TYPE (gpr),
4357 (REGPARM_MAX - needed_intregs + 1) * 8);
4358 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4359 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4360 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4361 gimplify_and_add (t, pre_p);
4365 t = build_int_cst (TREE_TYPE (fpr),
4366 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4368 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4369 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4370 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4371 gimplify_and_add (t, pre_p);
4374 /* Compute index to start of area used for integer regs. */
4377 /* int_addr = gpr + sav; */
4378 t = fold_convert (ptr_type_node, gpr);
4379 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4380 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4381 gimplify_and_add (t, pre_p);
4385 /* sse_addr = fpr + sav; */
4386 t = fold_convert (ptr_type_node, fpr);
4387 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4388 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4389 gimplify_and_add (t, pre_p);
4394 tree temp = create_tmp_var (type, "va_arg_tmp");
4397 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4398 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4399 gimplify_and_add (t, pre_p);
4401 for (i = 0; i < XVECLEN (container, 0); i++)
4403 rtx slot = XVECEXP (container, 0, i);
4404 rtx reg = XEXP (slot, 0);
4405 enum machine_mode mode = GET_MODE (reg);
4406 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4407 tree addr_type = build_pointer_type (piece_type);
4410 tree dest_addr, dest;
4412 if (SSE_REGNO_P (REGNO (reg)))
4414 src_addr = sse_addr;
4415 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4419 src_addr = int_addr;
4420 src_offset = REGNO (reg) * 8;
4422 src_addr = fold_convert (addr_type, src_addr);
4423 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4424 size_int (src_offset)));
4425 src = build_va_arg_indirect_ref (src_addr);
4427 dest_addr = fold_convert (addr_type, addr);
4428 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4429 size_int (INTVAL (XEXP (slot, 1)))));
4430 dest = build_va_arg_indirect_ref (dest_addr);
4432 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4433 gimplify_and_add (t, pre_p);
4439 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4440 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4441 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4442 gimplify_and_add (t, pre_p);
4446 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4447 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4448 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4449 gimplify_and_add (t, pre_p);
4452 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4453 gimplify_and_add (t, pre_p);
4455 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4456 append_to_statement_list (t, pre_p);
4459 /* ... otherwise out of the overflow area. */
4461 /* Care for on-stack alignment if needed. */
4462 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4463 || integer_zerop (TYPE_SIZE (type)))
4467 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4468 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4469 build_int_cst (TREE_TYPE (ovf), align - 1));
4470 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4471 build_int_cst (TREE_TYPE (t), -align));
4473 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4475 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4476 gimplify_and_add (t2, pre_p);
4478 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4479 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4480 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4481 gimplify_and_add (t, pre_p);
4485 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4486 append_to_statement_list (t, pre_p);
4489 ptrtype = build_pointer_type (type);
4490 addr = fold_convert (ptrtype, addr);
4493 addr = build_va_arg_indirect_ref (addr);
4494 return build_va_arg_indirect_ref (addr);
4497 /* Return nonzero if OPNUM's MEM should be matched
4498 in movabs* patterns. */
4501 ix86_check_movabs (rtx insn, int opnum)
4505 set = PATTERN (insn);
4506 if (GET_CODE (set) == PARALLEL)
4507 set = XVECEXP (set, 0, 0);
4508 gcc_assert (GET_CODE (set) == SET);
4509 mem = XEXP (set, opnum);
4510 while (GET_CODE (mem) == SUBREG)
4511 mem = SUBREG_REG (mem);
4512 gcc_assert (GET_CODE (mem) == MEM);
4513 return (volatile_ok || !MEM_VOLATILE_P (mem));
4516 /* Initialize the table of extra 80387 mathematical constants. */
4519 init_ext_80387_constants (void)
4521 static const char * cst[5] =
4523 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4524 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4525 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4526 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4527 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4531 for (i = 0; i < 5; i++)
4533 real_from_string (&ext_80387_constants_table[i], cst[i]);
4534 /* Ensure each constant is rounded to XFmode precision. */
4535 real_convert (&ext_80387_constants_table[i],
4536 XFmode, &ext_80387_constants_table[i]);
4539 ext_80387_constants_init = 1;
4542 /* Return true if the constant is something that can be loaded with
4543 a special instruction. */
4546 standard_80387_constant_p (rtx x)
4548 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4551 if (x == CONST0_RTX (GET_MODE (x)))
4553 if (x == CONST1_RTX (GET_MODE (x)))
4556 /* For XFmode constants, try to find a special 80387 instruction when
4557 optimizing for size or on those CPUs that benefit from them. */
4558 if (GET_MODE (x) == XFmode
4559 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4564 if (! ext_80387_constants_init)
4565 init_ext_80387_constants ();
4567 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4568 for (i = 0; i < 5; i++)
4569 if (real_identical (&r, &ext_80387_constants_table[i]))
4576 /* Return the opcode of the special instruction to be used to load
4580 standard_80387_constant_opcode (rtx x)
4582 switch (standard_80387_constant_p (x))
4603 /* Return the CONST_DOUBLE representing the 80387 constant that is
4604 loaded by the specified special instruction. The argument IDX
4605 matches the return value from standard_80387_constant_p. */
4608 standard_80387_constant_rtx (int idx)
4612 if (! ext_80387_constants_init)
4613 init_ext_80387_constants ();
4629 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4633 /* Return 1 if mode is a valid mode for sse. */
4635 standard_sse_mode_p (enum machine_mode mode)
4652 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4655 standard_sse_constant_p (rtx x)
4657 enum machine_mode mode = GET_MODE (x);
4659 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4661 if (vector_all_ones_operand (x, mode)
4662 && standard_sse_mode_p (mode))
4663 return TARGET_SSE2 ? 2 : -1;
4668 /* Return the opcode of the special instruction to be used to load
4672 standard_sse_constant_opcode (rtx insn, rtx x)
4674 switch (standard_sse_constant_p (x))
4677 if (get_attr_mode (insn) == MODE_V4SF)
4678 return "xorps\t%0, %0";
4679 else if (get_attr_mode (insn) == MODE_V2DF)
4680 return "xorpd\t%0, %0";
4682 return "pxor\t%0, %0";
4684 return "pcmpeqd\t%0, %0";
4689 /* Returns 1 if OP contains a symbol reference */
4692 symbolic_reference_mentioned_p (rtx op)
4697 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4700 fmt = GET_RTX_FORMAT (GET_CODE (op));
4701 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4707 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4708 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4712 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4719 /* Return 1 if it is appropriate to emit `ret' instructions in the
4720 body of a function. Do this only if the epilogue is simple, needing a
4721 couple of insns. Prior to reloading, we can't tell how many registers
4722 must be saved, so return 0 then. Return 0 if there is no frame
4723 marker to de-allocate. */
4726 ix86_can_use_return_insn_p (void)
4728 struct ix86_frame frame;
4730 if (! reload_completed || frame_pointer_needed)
4733 /* Don't allow more than 32 pop, since that's all we can do
4734 with one instruction. */
4735 if (current_function_pops_args
4736 && current_function_args_size >= 32768)
4739 ix86_compute_frame_layout (&frame);
4740 return frame.to_allocate == 0 && frame.nregs == 0;
4743 /* Value should be nonzero if functions must have frame pointers.
4744 Zero means the frame pointer need not be set up (and parms may
4745 be accessed via the stack pointer) in functions that seem suitable. */
4748 ix86_frame_pointer_required (void)
4750 /* If we accessed previous frames, then the generated code expects
4751 to be able to access the saved ebp value in our frame. */
4752 if (cfun->machine->accesses_prev_frame)
4755 /* Several x86 os'es need a frame pointer for other reasons,
4756 usually pertaining to setjmp. */
4757 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4760 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4761 the frame pointer by default. Turn it back on now if we've not
4762 got a leaf function. */
4763 if (TARGET_OMIT_LEAF_FRAME_POINTER
4764 && (!current_function_is_leaf
4765 || ix86_current_function_calls_tls_descriptor))
4768 if (current_function_profile)
4774 /* Record that the current function accesses previous call frames. */
4777 ix86_setup_frame_addresses (void)
4779 cfun->machine->accesses_prev_frame = 1;
4782 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4783 # define USE_HIDDEN_LINKONCE 1
4785 # define USE_HIDDEN_LINKONCE 0
4788 static int pic_labels_used;
4790 /* Fills in the label name that should be used for a pc thunk for
4791 the given register. */
4794 get_pc_thunk_name (char name[32], unsigned int regno)
4796 gcc_assert (!TARGET_64BIT);
4798 if (USE_HIDDEN_LINKONCE)
4799 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4801 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4805 /* This function generates code for -fpic that loads %ebx with
4806 the return address of the caller and then returns. */
4809 ix86_file_end (void)
4814 for (regno = 0; regno < 8; ++regno)
4818 if (! ((pic_labels_used >> regno) & 1))
4821 get_pc_thunk_name (name, regno);
4826 switch_to_section (darwin_sections[text_coal_section]);
4827 fputs ("\t.weak_definition\t", asm_out_file);
4828 assemble_name (asm_out_file, name);
4829 fputs ("\n\t.private_extern\t", asm_out_file);
4830 assemble_name (asm_out_file, name);
4831 fputs ("\n", asm_out_file);
4832 ASM_OUTPUT_LABEL (asm_out_file, name);
4836 if (USE_HIDDEN_LINKONCE)
4840 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4842 TREE_PUBLIC (decl) = 1;
4843 TREE_STATIC (decl) = 1;
4844 DECL_ONE_ONLY (decl) = 1;
4846 (*targetm.asm_out.unique_section) (decl, 0);
4847 switch_to_section (get_named_section (decl, NULL, 0));
4849 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4850 fputs ("\t.hidden\t", asm_out_file);
4851 assemble_name (asm_out_file, name);
4852 fputc ('\n', asm_out_file);
4853 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4857 switch_to_section (text_section);
4858 ASM_OUTPUT_LABEL (asm_out_file, name);
4861 xops[0] = gen_rtx_REG (SImode, regno);
4862 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4863 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4864 output_asm_insn ("ret", xops);
4867 if (NEED_INDICATE_EXEC_STACK)
4868 file_end_indicate_exec_stack ();
4871 /* Emit code for the SET_GOT patterns. */
4874 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4879 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4881 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4883 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4886 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4888 output_asm_insn ("call\t%a2", xops);
4891 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4892 is what will be referenced by the Mach-O PIC subsystem. */
4894 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4897 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4898 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4901 output_asm_insn ("pop{l}\t%0", xops);
4906 get_pc_thunk_name (name, REGNO (dest));
4907 pic_labels_used |= 1 << REGNO (dest);
4909 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4910 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4911 output_asm_insn ("call\t%X2", xops);
4912 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4913 is what will be referenced by the Mach-O PIC subsystem. */
4916 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4918 targetm.asm_out.internal_label (asm_out_file, "L",
4919 CODE_LABEL_NUMBER (label));
4926 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4927 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4929 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4934 /* Generate an "push" pattern for input ARG. */
4939 return gen_rtx_SET (VOIDmode,
4941 gen_rtx_PRE_DEC (Pmode,
4942 stack_pointer_rtx)),
4946 /* Return >= 0 if there is an unused call-clobbered register available
4947 for the entire function. */
4950 ix86_select_alt_pic_regnum (void)
4952 if (current_function_is_leaf && !current_function_profile
4953 && !ix86_current_function_calls_tls_descriptor)
4956 for (i = 2; i >= 0; --i)
4957 if (!regs_ever_live[i])
4961 return INVALID_REGNUM;
4964 /* Return 1 if we need to save REGNO. */
4966 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4968 if (pic_offset_table_rtx
4969 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4970 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4971 || current_function_profile
4972 || current_function_calls_eh_return
4973 || current_function_uses_const_pool))
4975 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4980 if (current_function_calls_eh_return && maybe_eh_return)
4985 unsigned test = EH_RETURN_DATA_REGNO (i);
4986 if (test == INVALID_REGNUM)
4993 if (cfun->machine->force_align_arg_pointer
4994 && regno == REGNO (cfun->machine->force_align_arg_pointer))
4997 return (regs_ever_live[regno]
4998 && !call_used_regs[regno]
4999 && !fixed_regs[regno]
5000 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5003 /* Return number of registers to be saved on the stack. */
5006 ix86_nsaved_regs (void)
5011 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5012 if (ix86_save_reg (regno, true))
5017 /* Return the offset between two registers, one to be eliminated, and the other
5018 its replacement, at the start of a routine. */
5021 ix86_initial_elimination_offset (int from, int to)
5023 struct ix86_frame frame;
5024 ix86_compute_frame_layout (&frame);
5026 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5027 return frame.hard_frame_pointer_offset;
5028 else if (from == FRAME_POINTER_REGNUM
5029 && to == HARD_FRAME_POINTER_REGNUM)
5030 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5033 gcc_assert (to == STACK_POINTER_REGNUM);
5035 if (from == ARG_POINTER_REGNUM)
5036 return frame.stack_pointer_offset;
5038 gcc_assert (from == FRAME_POINTER_REGNUM);
5039 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5043 /* Fill structure ix86_frame about frame of currently computed function. */
5046 ix86_compute_frame_layout (struct ix86_frame *frame)
5048 HOST_WIDE_INT total_size;
5049 unsigned int stack_alignment_needed;
5050 HOST_WIDE_INT offset;
5051 unsigned int preferred_alignment;
5052 HOST_WIDE_INT size = get_frame_size ();
5054 frame->nregs = ix86_nsaved_regs ();
5057 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5058 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5060 /* During reload iteration the amount of registers saved can change.
5061 Recompute the value as needed. Do not recompute when amount of registers
5062 didn't change as reload does multiple calls to the function and does not
5063 expect the decision to change within single iteration. */
5065 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5067 int count = frame->nregs;
5069 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5070 /* The fast prologue uses move instead of push to save registers. This
5071 is significantly longer, but also executes faster as modern hardware
5072 can execute the moves in parallel, but can't do that for push/pop.
5074 Be careful about choosing what prologue to emit: When function takes
5075 many instructions to execute we may use slow version as well as in
5076 case function is known to be outside hot spot (this is known with
5077 feedback only). Weight the size of function by number of registers
5078 to save as it is cheap to use one or two push instructions but very
5079 slow to use many of them. */
5081 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5082 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5083 || (flag_branch_probabilities
5084 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5085 cfun->machine->use_fast_prologue_epilogue = false;
5087 cfun->machine->use_fast_prologue_epilogue
5088 = !expensive_function_p (count);
5090 if (TARGET_PROLOGUE_USING_MOVE
5091 && cfun->machine->use_fast_prologue_epilogue)
5092 frame->save_regs_using_mov = true;
5094 frame->save_regs_using_mov = false;
5097 /* Skip return address and saved base pointer. */
5098 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5100 frame->hard_frame_pointer_offset = offset;
5102 /* Do some sanity checking of stack_alignment_needed and
5103 preferred_alignment, since i386 port is the only using those features
5104 that may break easily. */
5106 gcc_assert (!size || stack_alignment_needed);
5107 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5108 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5109 gcc_assert (stack_alignment_needed
5110 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5112 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5113 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5115 /* Register save area */
5116 offset += frame->nregs * UNITS_PER_WORD;
5119 if (ix86_save_varrargs_registers)
5121 offset += X86_64_VARARGS_SIZE;
5122 frame->va_arg_size = X86_64_VARARGS_SIZE;
5125 frame->va_arg_size = 0;
5127 /* Align start of frame for local function. */
5128 frame->padding1 = ((offset + stack_alignment_needed - 1)
5129 & -stack_alignment_needed) - offset;
5131 offset += frame->padding1;
5133 /* Frame pointer points here. */
5134 frame->frame_pointer_offset = offset;
5138 /* Add outgoing arguments area. Can be skipped if we eliminated
5139 all the function calls as dead code.
5140 Skipping is however impossible when function calls alloca. Alloca
5141 expander assumes that last current_function_outgoing_args_size
5142 of stack frame are unused. */
5143 if (ACCUMULATE_OUTGOING_ARGS
5144 && (!current_function_is_leaf || current_function_calls_alloca
5145 || ix86_current_function_calls_tls_descriptor))
5147 offset += current_function_outgoing_args_size;
5148 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5151 frame->outgoing_arguments_size = 0;
5153 /* Align stack boundary. Only needed if we're calling another function
5155 if (!current_function_is_leaf || current_function_calls_alloca
5156 || ix86_current_function_calls_tls_descriptor)
5157 frame->padding2 = ((offset + preferred_alignment - 1)
5158 & -preferred_alignment) - offset;
5160 frame->padding2 = 0;
5162 offset += frame->padding2;
5164 /* We've reached end of stack frame. */
5165 frame->stack_pointer_offset = offset;
5167 /* Size prologue needs to allocate. */
5168 frame->to_allocate =
5169 (size + frame->padding1 + frame->padding2
5170 + frame->outgoing_arguments_size + frame->va_arg_size);
5172 if ((!frame->to_allocate && frame->nregs <= 1)
5173 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5174 frame->save_regs_using_mov = false;
5176 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5177 && current_function_is_leaf
5178 && !ix86_current_function_calls_tls_descriptor)
5180 frame->red_zone_size = frame->to_allocate;
5181 if (frame->save_regs_using_mov)
5182 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5183 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5184 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5187 frame->red_zone_size = 0;
5188 frame->to_allocate -= frame->red_zone_size;
5189 frame->stack_pointer_offset -= frame->red_zone_size;
5191 fprintf (stderr, "nregs: %i\n", frame->nregs);
5192 fprintf (stderr, "size: %i\n", size);
5193 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5194 fprintf (stderr, "padding1: %i\n", frame->padding1);
5195 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5196 fprintf (stderr, "padding2: %i\n", frame->padding2);
5197 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5198 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5199 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5200 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5201 frame->hard_frame_pointer_offset);
5202 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5206 /* Emit code to save registers in the prologue. */
5209 ix86_emit_save_regs (void)
5214 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5215 if (ix86_save_reg (regno, true))
5217 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5218 RTX_FRAME_RELATED_P (insn) = 1;
5222 /* Emit code to save registers using MOV insns. First register
5223 is restored from POINTER + OFFSET. */
5225 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5230 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5231 if (ix86_save_reg (regno, true))
5233 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5235 gen_rtx_REG (Pmode, regno));
5236 RTX_FRAME_RELATED_P (insn) = 1;
5237 offset += UNITS_PER_WORD;
5241 /* Expand prologue or epilogue stack adjustment.
5242 The pattern exist to put a dependency on all ebp-based memory accesses.
5243 STYLE should be negative if instructions should be marked as frame related,
5244 zero if %r11 register is live and cannot be freely used and positive
5248 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5253 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5254 else if (x86_64_immediate_operand (offset, DImode))
5255 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5259 /* r11 is used by indirect sibcall return as well, set before the
5260 epilogue and used after the epilogue. ATM indirect sibcall
5261 shouldn't be used together with huge frame sizes in one
5262 function because of the frame_size check in sibcall.c. */
5264 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5265 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5267 RTX_FRAME_RELATED_P (insn) = 1;
5268 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5272 RTX_FRAME_RELATED_P (insn) = 1;
5275 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5278 ix86_internal_arg_pointer (void)
5280 bool has_force_align_arg_pointer =
5281 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5282 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5283 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5284 && DECL_NAME (current_function_decl)
5285 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5286 && DECL_FILE_SCOPE_P (current_function_decl))
5287 || ix86_force_align_arg_pointer
5288 || has_force_align_arg_pointer)
5290 /* Nested functions can't realign the stack due to a register
5292 if (DECL_CONTEXT (current_function_decl)
5293 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5295 if (ix86_force_align_arg_pointer)
5296 warning (0, "-mstackrealign ignored for nested functions");
5297 if (has_force_align_arg_pointer)
5298 error ("%s not supported for nested functions",
5299 ix86_force_align_arg_pointer_string);
5300 return virtual_incoming_args_rtx;
5302 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5303 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5306 return virtual_incoming_args_rtx;
5309 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5310 This is called from dwarf2out.c to emit call frame instructions
5311 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5313 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5315 rtx unspec = SET_SRC (pattern);
5316 gcc_assert (GET_CODE (unspec) == UNSPEC);
5320 case UNSPEC_REG_SAVE:
5321 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5322 SET_DEST (pattern));
5324 case UNSPEC_DEF_CFA:
5325 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5326 INTVAL (XVECEXP (unspec, 0, 0)));
5333 /* Expand the prologue into a bunch of separate insns. */
5336 ix86_expand_prologue (void)
5340 struct ix86_frame frame;
5341 HOST_WIDE_INT allocate;
5343 ix86_compute_frame_layout (&frame);
5345 if (cfun->machine->force_align_arg_pointer)
5349 /* Grab the argument pointer. */
5350 x = plus_constant (stack_pointer_rtx, 4);
5351 y = cfun->machine->force_align_arg_pointer;
5352 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5353 RTX_FRAME_RELATED_P (insn) = 1;
5355 /* The unwind info consists of two parts: install the fafp as the cfa,
5356 and record the fafp as the "save register" of the stack pointer.
5357 The later is there in order that the unwinder can see where it
5358 should restore the stack pointer across the and insn. */
5359 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5360 x = gen_rtx_SET (VOIDmode, y, x);
5361 RTX_FRAME_RELATED_P (x) = 1;
5362 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5364 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5365 RTX_FRAME_RELATED_P (y) = 1;
5366 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5367 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5368 REG_NOTES (insn) = x;
5370 /* Align the stack. */
5371 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5374 /* And here we cheat like madmen with the unwind info. We force the
5375 cfa register back to sp+4, which is exactly what it was at the
5376 start of the function. Re-pushing the return address results in
5377 the return at the same spot relative to the cfa, and thus is
5378 correct wrt the unwind info. */
5379 x = cfun->machine->force_align_arg_pointer;
5380 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5381 insn = emit_insn (gen_push (x));
5382 RTX_FRAME_RELATED_P (insn) = 1;
5385 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5386 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5387 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5388 REG_NOTES (insn) = x;
5391 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5392 slower on all targets. Also sdb doesn't like it. */
5394 if (frame_pointer_needed)
5396 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5397 RTX_FRAME_RELATED_P (insn) = 1;
5399 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5400 RTX_FRAME_RELATED_P (insn) = 1;
5403 allocate = frame.to_allocate;
5405 if (!frame.save_regs_using_mov)
5406 ix86_emit_save_regs ();
5408 allocate += frame.nregs * UNITS_PER_WORD;
5410 /* When using red zone we may start register saving before allocating
5411 the stack frame saving one cycle of the prologue. */
5412 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5413 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5414 : stack_pointer_rtx,
5415 -frame.nregs * UNITS_PER_WORD);
5419 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5420 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5421 GEN_INT (-allocate), -1);
5424 /* Only valid for Win32. */
5425 rtx eax = gen_rtx_REG (SImode, 0);
5426 bool eax_live = ix86_eax_live_at_start_p ();
5429 gcc_assert (!TARGET_64BIT);
5433 emit_insn (gen_push (eax));
5437 emit_move_insn (eax, GEN_INT (allocate));
5439 insn = emit_insn (gen_allocate_stack_worker (eax));
5440 RTX_FRAME_RELATED_P (insn) = 1;
5441 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5442 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5443 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5444 t, REG_NOTES (insn));
5448 if (frame_pointer_needed)
5449 t = plus_constant (hard_frame_pointer_rtx,
5452 - frame.nregs * UNITS_PER_WORD);
5454 t = plus_constant (stack_pointer_rtx, allocate);
5455 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5459 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5461 if (!frame_pointer_needed || !frame.to_allocate)
5462 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5464 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5465 -frame.nregs * UNITS_PER_WORD);
5468 pic_reg_used = false;
5469 if (pic_offset_table_rtx
5470 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5471 || current_function_profile))
5473 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5475 if (alt_pic_reg_used != INVALID_REGNUM)
5476 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5478 pic_reg_used = true;
5484 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5486 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5488 /* Even with accurate pre-reload life analysis, we can wind up
5489 deleting all references to the pic register after reload.
5490 Consider if cross-jumping unifies two sides of a branch
5491 controlled by a comparison vs the only read from a global.
5492 In which case, allow the set_got to be deleted, though we're
5493 too late to do anything about the ebx save in the prologue. */
5494 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5497 /* Prevent function calls from be scheduled before the call to mcount.
5498 In the pic_reg_used case, make sure that the got load isn't deleted. */
5499 if (current_function_profile)
5500 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5503 /* Emit code to restore saved registers using MOV insns. First register
5504 is restored from POINTER + OFFSET. */
5506 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5507 int maybe_eh_return)
5510 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5512 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5513 if (ix86_save_reg (regno, maybe_eh_return))
5515 /* Ensure that adjust_address won't be forced to produce pointer
5516 out of range allowed by x86-64 instruction set. */
5517 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5521 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5522 emit_move_insn (r11, GEN_INT (offset));
5523 emit_insn (gen_adddi3 (r11, r11, pointer));
5524 base_address = gen_rtx_MEM (Pmode, r11);
5527 emit_move_insn (gen_rtx_REG (Pmode, regno),
5528 adjust_address (base_address, Pmode, offset));
5529 offset += UNITS_PER_WORD;
5533 /* Restore function stack, frame, and registers. */
5536 ix86_expand_epilogue (int style)
5539 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5540 struct ix86_frame frame;
5541 HOST_WIDE_INT offset;
5543 ix86_compute_frame_layout (&frame);
5545 /* Calculate start of saved registers relative to ebp. Special care
5546 must be taken for the normal return case of a function using
5547 eh_return: the eax and edx registers are marked as saved, but not
5548 restored along this path. */
5549 offset = frame.nregs;
5550 if (current_function_calls_eh_return && style != 2)
5552 offset *= -UNITS_PER_WORD;
5554 /* If we're only restoring one register and sp is not valid then
5555 using a move instruction to restore the register since it's
5556 less work than reloading sp and popping the register.
5558 The default code result in stack adjustment using add/lea instruction,
5559 while this code results in LEAVE instruction (or discrete equivalent),
5560 so it is profitable in some other cases as well. Especially when there
5561 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5562 and there is exactly one register to pop. This heuristic may need some
5563 tuning in future. */
5564 if ((!sp_valid && frame.nregs <= 1)
5565 || (TARGET_EPILOGUE_USING_MOVE
5566 && cfun->machine->use_fast_prologue_epilogue
5567 && (frame.nregs > 1 || frame.to_allocate))
5568 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5569 || (frame_pointer_needed && TARGET_USE_LEAVE
5570 && cfun->machine->use_fast_prologue_epilogue
5571 && frame.nregs == 1)
5572 || current_function_calls_eh_return)
5574 /* Restore registers. We can use ebp or esp to address the memory
5575 locations. If both are available, default to ebp, since offsets
5576 are known to be small. Only exception is esp pointing directly to the
5577 end of block of saved registers, where we may simplify addressing
5580 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5581 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5582 frame.to_allocate, style == 2);
5584 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5585 offset, style == 2);
5587 /* eh_return epilogues need %ecx added to the stack pointer. */
5590 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5592 if (frame_pointer_needed)
5594 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5595 tmp = plus_constant (tmp, UNITS_PER_WORD);
5596 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5598 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5599 emit_move_insn (hard_frame_pointer_rtx, tmp);
5601 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5606 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5607 tmp = plus_constant (tmp, (frame.to_allocate
5608 + frame.nregs * UNITS_PER_WORD));
5609 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5612 else if (!frame_pointer_needed)
5613 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5614 GEN_INT (frame.to_allocate
5615 + frame.nregs * UNITS_PER_WORD),
5617 /* If not an i386, mov & pop is faster than "leave". */
5618 else if (TARGET_USE_LEAVE || optimize_size
5619 || !cfun->machine->use_fast_prologue_epilogue)
5620 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5623 pro_epilogue_adjust_stack (stack_pointer_rtx,
5624 hard_frame_pointer_rtx,
5627 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5629 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5634 /* First step is to deallocate the stack frame so that we can
5635 pop the registers. */
5638 gcc_assert (frame_pointer_needed);
5639 pro_epilogue_adjust_stack (stack_pointer_rtx,
5640 hard_frame_pointer_rtx,
5641 GEN_INT (offset), style);
5643 else if (frame.to_allocate)
5644 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5645 GEN_INT (frame.to_allocate), style);
5647 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5648 if (ix86_save_reg (regno, false))
5651 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5653 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5655 if (frame_pointer_needed)
5657 /* Leave results in shorter dependency chains on CPUs that are
5658 able to grok it fast. */
5659 if (TARGET_USE_LEAVE)
5660 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5661 else if (TARGET_64BIT)
5662 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5664 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5668 if (cfun->machine->force_align_arg_pointer)
5670 emit_insn (gen_addsi3 (stack_pointer_rtx,
5671 cfun->machine->force_align_arg_pointer,
5675 /* Sibcall epilogues don't want a return instruction. */
5679 if (current_function_pops_args && current_function_args_size)
5681 rtx popc = GEN_INT (current_function_pops_args);
5683 /* i386 can only pop 64K bytes. If asked to pop more, pop
5684 return address, do explicit add, and jump indirectly to the
5687 if (current_function_pops_args >= 65536)
5689 rtx ecx = gen_rtx_REG (SImode, 2);
5691 /* There is no "pascal" calling convention in 64bit ABI. */
5692 gcc_assert (!TARGET_64BIT);
5694 emit_insn (gen_popsi1 (ecx));
5695 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5696 emit_jump_insn (gen_return_indirect_internal (ecx));
5699 emit_jump_insn (gen_return_pop_internal (popc));
5702 emit_jump_insn (gen_return_internal ());
5705 /* Reset from the function's potential modifications. */
5708 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5709 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5711 if (pic_offset_table_rtx)
5712 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5714 /* Mach-O doesn't support labels at the end of objects, so if
5715 it looks like we might want one, insert a NOP. */
5717 rtx insn = get_last_insn ();
5720 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5721 insn = PREV_INSN (insn);
5725 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5726 fputs ("\tnop\n", file);
5732 /* Extract the parts of an RTL expression that is a valid memory address
5733 for an instruction. Return 0 if the structure of the address is
5734 grossly off. Return -1 if the address contains ASHIFT, so it is not
5735 strictly valid, but still used for computing length of lea instruction. */
5738 ix86_decompose_address (rtx addr, struct ix86_address *out)
5740 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5741 rtx base_reg, index_reg;
5742 HOST_WIDE_INT scale = 1;
5743 rtx scale_rtx = NULL_RTX;
5745 enum ix86_address_seg seg = SEG_DEFAULT;
5747 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5749 else if (GET_CODE (addr) == PLUS)
5759 addends[n++] = XEXP (op, 1);
5762 while (GET_CODE (op) == PLUS);
5767 for (i = n; i >= 0; --i)
5770 switch (GET_CODE (op))
5775 index = XEXP (op, 0);
5776 scale_rtx = XEXP (op, 1);
5780 if (XINT (op, 1) == UNSPEC_TP
5781 && TARGET_TLS_DIRECT_SEG_REFS
5782 && seg == SEG_DEFAULT)
5783 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5812 else if (GET_CODE (addr) == MULT)
5814 index = XEXP (addr, 0); /* index*scale */
5815 scale_rtx = XEXP (addr, 1);
5817 else if (GET_CODE (addr) == ASHIFT)
5821 /* We're called for lea too, which implements ashift on occasion. */
5822 index = XEXP (addr, 0);
5823 tmp = XEXP (addr, 1);
5824 if (GET_CODE (tmp) != CONST_INT)
5826 scale = INTVAL (tmp);
5827 if ((unsigned HOST_WIDE_INT) scale > 3)
5833 disp = addr; /* displacement */
5835 /* Extract the integral value of scale. */
5838 if (GET_CODE (scale_rtx) != CONST_INT)
5840 scale = INTVAL (scale_rtx);
5843 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5844 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5846 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5847 if (base_reg && index_reg && scale == 1
5848 && (index_reg == arg_pointer_rtx
5849 || index_reg == frame_pointer_rtx
5850 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5853 tmp = base, base = index, index = tmp;
5854 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5857 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5858 if ((base_reg == hard_frame_pointer_rtx
5859 || base_reg == frame_pointer_rtx
5860 || base_reg == arg_pointer_rtx) && !disp)
5863 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5864 Avoid this by transforming to [%esi+0]. */
5865 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5866 && base_reg && !index_reg && !disp
5868 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5871 /* Special case: encode reg+reg instead of reg*2. */
5872 if (!base && index && scale && scale == 2)
5873 base = index, base_reg = index_reg, scale = 1;
5875 /* Special case: scaling cannot be encoded without base or displacement. */
5876 if (!base && !disp && index && scale != 1)
5888 /* Return cost of the memory address x.
5889 For i386, it is better to use a complex address than let gcc copy
5890 the address into a reg and make a new pseudo. But not if the address
5891 requires to two regs - that would mean more pseudos with longer
5894 ix86_address_cost (rtx x)
5896 struct ix86_address parts;
5898 int ok = ix86_decompose_address (x, &parts);
5902 if (parts.base && GET_CODE (parts.base) == SUBREG)
5903 parts.base = SUBREG_REG (parts.base);
5904 if (parts.index && GET_CODE (parts.index) == SUBREG)
5905 parts.index = SUBREG_REG (parts.index);
5907 /* More complex memory references are better. */
5908 if (parts.disp && parts.disp != const0_rtx)
5910 if (parts.seg != SEG_DEFAULT)
5913 /* Attempt to minimize number of registers in the address. */
5915 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5917 && (!REG_P (parts.index)
5918 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5922 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5924 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5925 && parts.base != parts.index)
5928 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5929 since it's predecode logic can't detect the length of instructions
5930 and it degenerates to vector decoded. Increase cost of such
5931 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5932 to split such addresses or even refuse such addresses at all.
5934 Following addressing modes are affected:
5939 The first and last case may be avoidable by explicitly coding the zero in
5940 memory address, but I don't have AMD-K6 machine handy to check this
5944 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5945 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5946 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5952 /* If X is a machine specific address (i.e. a symbol or label being
5953 referenced as a displacement from the GOT implemented using an
5954 UNSPEC), then return the base term. Otherwise return X. */
5957 ix86_find_base_term (rtx x)
5963 if (GET_CODE (x) != CONST)
5966 if (GET_CODE (term) == PLUS
5967 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5968 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5969 term = XEXP (term, 0);
5970 if (GET_CODE (term) != UNSPEC
5971 || XINT (term, 1) != UNSPEC_GOTPCREL)
5974 term = XVECEXP (term, 0, 0);
5976 if (GET_CODE (term) != SYMBOL_REF
5977 && GET_CODE (term) != LABEL_REF)
5983 term = ix86_delegitimize_address (x);
5985 if (GET_CODE (term) != SYMBOL_REF
5986 && GET_CODE (term) != LABEL_REF)
5992 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5993 this is used for to form addresses to local data when -fPIC is in
5997 darwin_local_data_pic (rtx disp)
5999 if (GET_CODE (disp) == MINUS)
6001 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6002 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6003 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6005 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6006 if (! strcmp (sym_name, "<pic base>"))
6014 /* Determine if a given RTX is a valid constant. We already know this
6015 satisfies CONSTANT_P. */
6018 legitimate_constant_p (rtx x)
6020 switch (GET_CODE (x))
6025 if (GET_CODE (x) == PLUS)
6027 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6032 if (TARGET_MACHO && darwin_local_data_pic (x))
6035 /* Only some unspecs are valid as "constants". */
6036 if (GET_CODE (x) == UNSPEC)
6037 switch (XINT (x, 1))
6040 return TARGET_64BIT;
6043 x = XVECEXP (x, 0, 0);
6044 return (GET_CODE (x) == SYMBOL_REF
6045 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6047 x = XVECEXP (x, 0, 0);
6048 return (GET_CODE (x) == SYMBOL_REF
6049 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6054 /* We must have drilled down to a symbol. */
6055 if (GET_CODE (x) == LABEL_REF)
6057 if (GET_CODE (x) != SYMBOL_REF)
6062 /* TLS symbols are never valid. */
6063 if (SYMBOL_REF_TLS_MODEL (x))
6068 if (GET_MODE (x) == TImode
6069 && x != CONST0_RTX (TImode)
6075 if (x == CONST0_RTX (GET_MODE (x)))
6083 /* Otherwise we handle everything else in the move patterns. */
6087 /* Determine if it's legal to put X into the constant pool. This
6088 is not possible for the address of thread-local symbols, which
6089 is checked above. */
6092 ix86_cannot_force_const_mem (rtx x)
6094 /* We can always put integral constants and vectors in memory. */
6095 switch (GET_CODE (x))
6105 return !legitimate_constant_p (x);
6108 /* Determine if a given RTX is a valid constant address. */
6111 constant_address_p (rtx x)
6113 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6116 /* Nonzero if the constant value X is a legitimate general operand
6117 when generating PIC code. It is given that flag_pic is on and
6118 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6121 legitimate_pic_operand_p (rtx x)
6125 switch (GET_CODE (x))
6128 inner = XEXP (x, 0);
6129 if (GET_CODE (inner) == PLUS
6130 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6131 inner = XEXP (inner, 0);
6133 /* Only some unspecs are valid as "constants". */
6134 if (GET_CODE (inner) == UNSPEC)
6135 switch (XINT (inner, 1))
6138 return TARGET_64BIT;
6140 x = XVECEXP (inner, 0, 0);
6141 return (GET_CODE (x) == SYMBOL_REF
6142 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6150 return legitimate_pic_address_disp_p (x);
6157 /* Determine if a given CONST RTX is a valid memory displacement
6161 legitimate_pic_address_disp_p (rtx disp)
6165 /* In 64bit mode we can allow direct addresses of symbols and labels
6166 when they are not dynamic symbols. */
6169 rtx op0 = disp, op1;
6171 switch (GET_CODE (disp))
6177 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6179 op0 = XEXP (XEXP (disp, 0), 0);
6180 op1 = XEXP (XEXP (disp, 0), 1);
6181 if (GET_CODE (op1) != CONST_INT
6182 || INTVAL (op1) >= 16*1024*1024
6183 || INTVAL (op1) < -16*1024*1024)
6185 if (GET_CODE (op0) == LABEL_REF)
6187 if (GET_CODE (op0) != SYMBOL_REF)
6192 /* TLS references should always be enclosed in UNSPEC. */
6193 if (SYMBOL_REF_TLS_MODEL (op0))
6195 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6203 if (GET_CODE (disp) != CONST)
6205 disp = XEXP (disp, 0);
6209 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6210 of GOT tables. We should not need these anyway. */
6211 if (GET_CODE (disp) != UNSPEC
6212 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6213 && XINT (disp, 1) != UNSPEC_GOTOFF))
6216 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6217 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6223 if (GET_CODE (disp) == PLUS)
6225 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6227 disp = XEXP (disp, 0);
6231 if (TARGET_MACHO && darwin_local_data_pic (disp))
6234 if (GET_CODE (disp) != UNSPEC)
6237 switch (XINT (disp, 1))
6242 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6244 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6245 While ABI specify also 32bit relocation but we don't produce it in
6246 small PIC model at all. */
6247 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6248 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6250 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6252 case UNSPEC_GOTTPOFF:
6253 case UNSPEC_GOTNTPOFF:
6254 case UNSPEC_INDNTPOFF:
6257 disp = XVECEXP (disp, 0, 0);
6258 return (GET_CODE (disp) == SYMBOL_REF
6259 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6261 disp = XVECEXP (disp, 0, 0);
6262 return (GET_CODE (disp) == SYMBOL_REF
6263 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6265 disp = XVECEXP (disp, 0, 0);
6266 return (GET_CODE (disp) == SYMBOL_REF
6267 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6273 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6274 memory address for an instruction. The MODE argument is the machine mode
6275 for the MEM expression that wants to use this address.
6277 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6278 convert common non-canonical forms to canonical form so that they will
6282 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6284 struct ix86_address parts;
6285 rtx base, index, disp;
6286 HOST_WIDE_INT scale;
6287 const char *reason = NULL;
6288 rtx reason_rtx = NULL_RTX;
6290 if (TARGET_DEBUG_ADDR)
6293 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6294 GET_MODE_NAME (mode), strict);
6298 if (ix86_decompose_address (addr, &parts) <= 0)
6300 reason = "decomposition failed";
6305 index = parts.index;
6307 scale = parts.scale;
6309 /* Validate base register.
6311 Don't allow SUBREG's that span more than a word here. It can lead to spill
6312 failures when the base is one word out of a two word structure, which is
6313 represented internally as a DImode int. */
6322 else if (GET_CODE (base) == SUBREG
6323 && REG_P (SUBREG_REG (base))
6324 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6326 reg = SUBREG_REG (base);
6329 reason = "base is not a register";
6333 if (GET_MODE (base) != Pmode)
6335 reason = "base is not in Pmode";
6339 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6340 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6342 reason = "base is not valid";
6347 /* Validate index register.
6349 Don't allow SUBREG's that span more than a word here -- same as above. */
6358 else if (GET_CODE (index) == SUBREG
6359 && REG_P (SUBREG_REG (index))
6360 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6362 reg = SUBREG_REG (index);
6365 reason = "index is not a register";
6369 if (GET_MODE (index) != Pmode)
6371 reason = "index is not in Pmode";
6375 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6376 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6378 reason = "index is not valid";
6383 /* Validate scale factor. */
6386 reason_rtx = GEN_INT (scale);
6389 reason = "scale without index";
6393 if (scale != 2 && scale != 4 && scale != 8)
6395 reason = "scale is not a valid multiplier";
6400 /* Validate displacement. */
6405 if (GET_CODE (disp) == CONST
6406 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6407 switch (XINT (XEXP (disp, 0), 1))
6409 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6410 used. While ABI specify also 32bit relocations, we don't produce
6411 them at all and use IP relative instead. */
6414 gcc_assert (flag_pic);
6416 goto is_legitimate_pic;
6417 reason = "64bit address unspec";
6420 case UNSPEC_GOTPCREL:
6421 gcc_assert (flag_pic);
6422 goto is_legitimate_pic;
6424 case UNSPEC_GOTTPOFF:
6425 case UNSPEC_GOTNTPOFF:
6426 case UNSPEC_INDNTPOFF:
6432 reason = "invalid address unspec";
6436 else if (SYMBOLIC_CONST (disp)
6440 && MACHOPIC_INDIRECT
6441 && !machopic_operand_p (disp)
6447 if (TARGET_64BIT && (index || base))
6449 /* foo@dtpoff(%rX) is ok. */
6450 if (GET_CODE (disp) != CONST
6451 || GET_CODE (XEXP (disp, 0)) != PLUS
6452 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6453 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6454 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6455 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6457 reason = "non-constant pic memory reference";
6461 else if (! legitimate_pic_address_disp_p (disp))
6463 reason = "displacement is an invalid pic construct";
6467 /* This code used to verify that a symbolic pic displacement
6468 includes the pic_offset_table_rtx register.
6470 While this is good idea, unfortunately these constructs may
6471 be created by "adds using lea" optimization for incorrect
6480 This code is nonsensical, but results in addressing
6481 GOT table with pic_offset_table_rtx base. We can't
6482 just refuse it easily, since it gets matched by
6483 "addsi3" pattern, that later gets split to lea in the
6484 case output register differs from input. While this
6485 can be handled by separate addsi pattern for this case
6486 that never results in lea, this seems to be easier and
6487 correct fix for crash to disable this test. */
6489 else if (GET_CODE (disp) != LABEL_REF
6490 && GET_CODE (disp) != CONST_INT
6491 && (GET_CODE (disp) != CONST
6492 || !legitimate_constant_p (disp))
6493 && (GET_CODE (disp) != SYMBOL_REF
6494 || !legitimate_constant_p (disp)))
6496 reason = "displacement is not constant";
6499 else if (TARGET_64BIT
6500 && !x86_64_immediate_operand (disp, VOIDmode))
6502 reason = "displacement is out of range";
6507 /* Everything looks valid. */
6508 if (TARGET_DEBUG_ADDR)
6509 fprintf (stderr, "Success.\n");
6513 if (TARGET_DEBUG_ADDR)
6515 fprintf (stderr, "Error: %s\n", reason);
6516 debug_rtx (reason_rtx);
6521 /* Return a unique alias set for the GOT. */
6523 static HOST_WIDE_INT
6524 ix86_GOT_alias_set (void)
6526 static HOST_WIDE_INT set = -1;
6528 set = new_alias_set ();
6532 /* Return a legitimate reference for ORIG (an address) using the
6533 register REG. If REG is 0, a new pseudo is generated.
6535 There are two types of references that must be handled:
6537 1. Global data references must load the address from the GOT, via
6538 the PIC reg. An insn is emitted to do this load, and the reg is
6541 2. Static data references, constant pool addresses, and code labels
6542 compute the address as an offset from the GOT, whose base is in
6543 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6544 differentiate them from global data objects. The returned
6545 address is the PIC reg + an unspec constant.
6547 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6548 reg also appears in the address. */
6551 legitimize_pic_address (rtx orig, rtx reg)
6558 if (TARGET_MACHO && !TARGET_64BIT)
6561 reg = gen_reg_rtx (Pmode);
6562 /* Use the generic Mach-O PIC machinery. */
6563 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6567 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6569 else if (TARGET_64BIT
6570 && ix86_cmodel != CM_SMALL_PIC
6571 && local_symbolic_operand (addr, Pmode))
6574 /* This symbol may be referenced via a displacement from the PIC
6575 base address (@GOTOFF). */
6577 if (reload_in_progress)
6578 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6579 if (GET_CODE (addr) == CONST)
6580 addr = XEXP (addr, 0);
6581 if (GET_CODE (addr) == PLUS)
6583 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6584 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6587 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6588 new = gen_rtx_CONST (Pmode, new);
6590 tmpreg = gen_reg_rtx (Pmode);
6593 emit_move_insn (tmpreg, new);
6597 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6598 tmpreg, 1, OPTAB_DIRECT);
6601 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6603 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6605 /* This symbol may be referenced via a displacement from the PIC
6606 base address (@GOTOFF). */
6608 if (reload_in_progress)
6609 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6610 if (GET_CODE (addr) == CONST)
6611 addr = XEXP (addr, 0);
6612 if (GET_CODE (addr) == PLUS)
6614 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6615 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6618 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6619 new = gen_rtx_CONST (Pmode, new);
6620 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6624 emit_move_insn (reg, new);
6628 else if (GET_CODE (addr) == SYMBOL_REF)
6632 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6633 new = gen_rtx_CONST (Pmode, new);
6634 new = gen_const_mem (Pmode, new);
6635 set_mem_alias_set (new, ix86_GOT_alias_set ());
6638 reg = gen_reg_rtx (Pmode);
6639 /* Use directly gen_movsi, otherwise the address is loaded
6640 into register for CSE. We don't want to CSE this addresses,
6641 instead we CSE addresses from the GOT table, so skip this. */
6642 emit_insn (gen_movsi (reg, new));
6647 /* This symbol must be referenced via a load from the
6648 Global Offset Table (@GOT). */
6650 if (reload_in_progress)
6651 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6652 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6653 new = gen_rtx_CONST (Pmode, new);
6654 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6655 new = gen_const_mem (Pmode, new);
6656 set_mem_alias_set (new, ix86_GOT_alias_set ());
6659 reg = gen_reg_rtx (Pmode);
6660 emit_move_insn (reg, new);
6666 if (GET_CODE (addr) == CONST_INT
6667 && !x86_64_immediate_operand (addr, VOIDmode))
6671 emit_move_insn (reg, addr);
6675 new = force_reg (Pmode, addr);
6677 else if (GET_CODE (addr) == CONST)
6679 addr = XEXP (addr, 0);
6681 /* We must match stuff we generate before. Assume the only
6682 unspecs that can get here are ours. Not that we could do
6683 anything with them anyway.... */
6684 if (GET_CODE (addr) == UNSPEC
6685 || (GET_CODE (addr) == PLUS
6686 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6688 gcc_assert (GET_CODE (addr) == PLUS);
6690 if (GET_CODE (addr) == PLUS)
6692 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6694 /* Check first to see if this is a constant offset from a @GOTOFF
6695 symbol reference. */
6696 if (local_symbolic_operand (op0, Pmode)
6697 && GET_CODE (op1) == CONST_INT)
6701 if (reload_in_progress)
6702 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6703 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6705 new = gen_rtx_PLUS (Pmode, new, op1);
6706 new = gen_rtx_CONST (Pmode, new);
6707 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6711 emit_move_insn (reg, new);
6717 if (INTVAL (op1) < -16*1024*1024
6718 || INTVAL (op1) >= 16*1024*1024)
6720 if (!x86_64_immediate_operand (op1, Pmode))
6721 op1 = force_reg (Pmode, op1);
6722 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6728 base = legitimize_pic_address (XEXP (addr, 0), reg);
6729 new = legitimize_pic_address (XEXP (addr, 1),
6730 base == reg ? NULL_RTX : reg);
6732 if (GET_CODE (new) == CONST_INT)
6733 new = plus_constant (base, INTVAL (new));
6736 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6738 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6739 new = XEXP (new, 1);
6741 new = gen_rtx_PLUS (Pmode, base, new);
6749 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6752 get_thread_pointer (int to_reg)
6756 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6760 reg = gen_reg_rtx (Pmode);
6761 insn = gen_rtx_SET (VOIDmode, reg, tp);
6762 insn = emit_insn (insn);
6767 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6768 false if we expect this to be used for a memory address and true if
6769 we expect to load the address into a register. */
6772 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6774 rtx dest, base, off, pic, tp;
6779 case TLS_MODEL_GLOBAL_DYNAMIC:
6780 dest = gen_reg_rtx (Pmode);
6781 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6783 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6785 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6788 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6789 insns = get_insns ();
6792 emit_libcall_block (insns, dest, rax, x);
6794 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6795 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6797 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6799 if (TARGET_GNU2_TLS)
6801 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6803 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6807 case TLS_MODEL_LOCAL_DYNAMIC:
6808 base = gen_reg_rtx (Pmode);
6809 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6811 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6813 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6816 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6817 insns = get_insns ();
6820 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6821 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6822 emit_libcall_block (insns, base, rax, note);
6824 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6825 emit_insn (gen_tls_local_dynamic_base_64 (base));
6827 emit_insn (gen_tls_local_dynamic_base_32 (base));
6829 if (TARGET_GNU2_TLS)
6831 rtx x = ix86_tls_module_base ();
6833 set_unique_reg_note (get_last_insn (), REG_EQUIV,
6834 gen_rtx_MINUS (Pmode, x, tp));
6837 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6838 off = gen_rtx_CONST (Pmode, off);
6840 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6842 if (TARGET_GNU2_TLS)
6844 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
6846 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6851 case TLS_MODEL_INITIAL_EXEC:
6855 type = UNSPEC_GOTNTPOFF;
6859 if (reload_in_progress)
6860 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6861 pic = pic_offset_table_rtx;
6862 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6864 else if (!TARGET_ANY_GNU_TLS)
6866 pic = gen_reg_rtx (Pmode);
6867 emit_insn (gen_set_got (pic));
6868 type = UNSPEC_GOTTPOFF;
6873 type = UNSPEC_INDNTPOFF;
6876 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6877 off = gen_rtx_CONST (Pmode, off);
6879 off = gen_rtx_PLUS (Pmode, pic, off);
6880 off = gen_const_mem (Pmode, off);
6881 set_mem_alias_set (off, ix86_GOT_alias_set ());
6883 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6885 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6886 off = force_reg (Pmode, off);
6887 return gen_rtx_PLUS (Pmode, base, off);
6891 base = get_thread_pointer (true);
6892 dest = gen_reg_rtx (Pmode);
6893 emit_insn (gen_subsi3 (dest, base, off));
6897 case TLS_MODEL_LOCAL_EXEC:
6898 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6899 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6900 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6901 off = gen_rtx_CONST (Pmode, off);
6903 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6905 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6906 return gen_rtx_PLUS (Pmode, base, off);
6910 base = get_thread_pointer (true);
6911 dest = gen_reg_rtx (Pmode);
6912 emit_insn (gen_subsi3 (dest, base, off));
6923 /* Try machine-dependent ways of modifying an illegitimate address
6924 to be legitimate. If we find one, return the new, valid address.
6925 This macro is used in only one place: `memory_address' in explow.c.
6927 OLDX is the address as it was before break_out_memory_refs was called.
6928 In some cases it is useful to look at this to decide what needs to be done.
6930 MODE and WIN are passed so that this macro can use
6931 GO_IF_LEGITIMATE_ADDRESS.
6933 It is always safe for this macro to do nothing. It exists to recognize
6934 opportunities to optimize the output.
6936 For the 80386, we handle X+REG by loading X into a register R and
6937 using R+REG. R will go in a general reg and indexing will be used.
6938 However, if REG is a broken-out memory address or multiplication,
6939 nothing needs to be done because REG can certainly go in a general reg.
6941 When -fpic is used, special handling is needed for symbolic references.
6942 See comments by legitimize_pic_address in i386.c for details. */
6945 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6950 if (TARGET_DEBUG_ADDR)
6952 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6953 GET_MODE_NAME (mode));
6957 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6959 return legitimize_tls_address (x, log, false);
6960 if (GET_CODE (x) == CONST
6961 && GET_CODE (XEXP (x, 0)) == PLUS
6962 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6963 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6965 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6966 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6969 if (flag_pic && SYMBOLIC_CONST (x))
6970 return legitimize_pic_address (x, 0);
6972 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6973 if (GET_CODE (x) == ASHIFT
6974 && GET_CODE (XEXP (x, 1)) == CONST_INT
6975 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
6978 log = INTVAL (XEXP (x, 1));
6979 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6980 GEN_INT (1 << log));
6983 if (GET_CODE (x) == PLUS)
6985 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6987 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6988 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6989 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
6992 log = INTVAL (XEXP (XEXP (x, 0), 1));
6993 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6994 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6995 GEN_INT (1 << log));
6998 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6999 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7000 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7003 log = INTVAL (XEXP (XEXP (x, 1), 1));
7004 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7005 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7006 GEN_INT (1 << log));
7009 /* Put multiply first if it isn't already. */
7010 if (GET_CODE (XEXP (x, 1)) == MULT)
7012 rtx tmp = XEXP (x, 0);
7013 XEXP (x, 0) = XEXP (x, 1);
7018 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7019 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7020 created by virtual register instantiation, register elimination, and
7021 similar optimizations. */
7022 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7025 x = gen_rtx_PLUS (Pmode,
7026 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7027 XEXP (XEXP (x, 1), 0)),
7028 XEXP (XEXP (x, 1), 1));
7032 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7033 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7034 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7035 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7036 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7037 && CONSTANT_P (XEXP (x, 1)))
7040 rtx other = NULL_RTX;
7042 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7044 constant = XEXP (x, 1);
7045 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7047 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7049 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7050 other = XEXP (x, 1);
7058 x = gen_rtx_PLUS (Pmode,
7059 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7060 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7061 plus_constant (other, INTVAL (constant)));
7065 if (changed && legitimate_address_p (mode, x, FALSE))
7068 if (GET_CODE (XEXP (x, 0)) == MULT)
7071 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7074 if (GET_CODE (XEXP (x, 1)) == MULT)
7077 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7081 && GET_CODE (XEXP (x, 1)) == REG
7082 && GET_CODE (XEXP (x, 0)) == REG)
7085 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7088 x = legitimize_pic_address (x, 0);
7091 if (changed && legitimate_address_p (mode, x, FALSE))
7094 if (GET_CODE (XEXP (x, 0)) == REG)
7096 rtx temp = gen_reg_rtx (Pmode);
7097 rtx val = force_operand (XEXP (x, 1), temp);
7099 emit_move_insn (temp, val);
7105 else if (GET_CODE (XEXP (x, 1)) == REG)
7107 rtx temp = gen_reg_rtx (Pmode);
7108 rtx val = force_operand (XEXP (x, 0), temp);
7110 emit_move_insn (temp, val);
7120 /* Print an integer constant expression in assembler syntax. Addition
7121 and subtraction are the only arithmetic that may appear in these
7122 expressions. FILE is the stdio stream to write to, X is the rtx, and
7123 CODE is the operand print code from the output string. */
7126 output_pic_addr_const (FILE *file, rtx x, int code)
7130 switch (GET_CODE (x))
7133 gcc_assert (flag_pic);
7138 output_addr_const (file, x);
7139 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7140 fputs ("@PLT", file);
7147 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7148 assemble_name (asm_out_file, buf);
7152 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7156 /* This used to output parentheses around the expression,
7157 but that does not work on the 386 (either ATT or BSD assembler). */
7158 output_pic_addr_const (file, XEXP (x, 0), code);
7162 if (GET_MODE (x) == VOIDmode)
7164 /* We can use %d if the number is <32 bits and positive. */
7165 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7166 fprintf (file, "0x%lx%08lx",
7167 (unsigned long) CONST_DOUBLE_HIGH (x),
7168 (unsigned long) CONST_DOUBLE_LOW (x));
7170 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7173 /* We can't handle floating point constants;
7174 PRINT_OPERAND must handle them. */
7175 output_operand_lossage ("floating constant misused");
7179 /* Some assemblers need integer constants to appear first. */
7180 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7182 output_pic_addr_const (file, XEXP (x, 0), code);
7184 output_pic_addr_const (file, XEXP (x, 1), code);
7188 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7189 output_pic_addr_const (file, XEXP (x, 1), code);
7191 output_pic_addr_const (file, XEXP (x, 0), code);
7197 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7198 output_pic_addr_const (file, XEXP (x, 0), code);
7200 output_pic_addr_const (file, XEXP (x, 1), code);
7202 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7206 gcc_assert (XVECLEN (x, 0) == 1);
7207 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7208 switch (XINT (x, 1))
7211 fputs ("@GOT", file);
7214 fputs ("@GOTOFF", file);
7216 case UNSPEC_GOTPCREL:
7217 fputs ("@GOTPCREL(%rip)", file);
7219 case UNSPEC_GOTTPOFF:
7220 /* FIXME: This might be @TPOFF in Sun ld too. */
7221 fputs ("@GOTTPOFF", file);
7224 fputs ("@TPOFF", file);
7228 fputs ("@TPOFF", file);
7230 fputs ("@NTPOFF", file);
7233 fputs ("@DTPOFF", file);
7235 case UNSPEC_GOTNTPOFF:
7237 fputs ("@GOTTPOFF(%rip)", file);
7239 fputs ("@GOTNTPOFF", file);
7241 case UNSPEC_INDNTPOFF:
7242 fputs ("@INDNTPOFF", file);
7245 output_operand_lossage ("invalid UNSPEC as operand");
7251 output_operand_lossage ("invalid expression as operand");
7255 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7256 We need to emit DTP-relative relocations. */
7259 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7261 fputs (ASM_LONG, file);
7262 output_addr_const (file, x);
7263 fputs ("@DTPOFF", file);
7269 fputs (", 0", file);
7276 /* In the name of slightly smaller debug output, and to cater to
7277 general assembler lossage, recognize PIC+GOTOFF and turn it back
7278 into a direct symbol reference.
7280 On Darwin, this is necessary to avoid a crash, because Darwin
7281 has a different PIC label for each routine but the DWARF debugging
7282 information is not associated with any particular routine, so it's
7283 necessary to remove references to the PIC label from RTL stored by
7284 the DWARF output code. */
7287 ix86_delegitimize_address (rtx orig_x)
7290 /* reg_addend is NULL or a multiple of some register. */
7291 rtx reg_addend = NULL_RTX;
7292 /* const_addend is NULL or a const_int. */
7293 rtx const_addend = NULL_RTX;
7294 /* This is the result, or NULL. */
7295 rtx result = NULL_RTX;
7297 if (GET_CODE (x) == MEM)
7302 if (GET_CODE (x) != CONST
7303 || GET_CODE (XEXP (x, 0)) != UNSPEC
7304 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7305 || GET_CODE (orig_x) != MEM)
7307 return XVECEXP (XEXP (x, 0), 0, 0);
7310 if (GET_CODE (x) != PLUS
7311 || GET_CODE (XEXP (x, 1)) != CONST)
7314 if (GET_CODE (XEXP (x, 0)) == REG
7315 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7316 /* %ebx + GOT/GOTOFF */
7318 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7320 /* %ebx + %reg * scale + GOT/GOTOFF */
7321 reg_addend = XEXP (x, 0);
7322 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7323 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7324 reg_addend = XEXP (reg_addend, 1);
7325 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7326 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7327 reg_addend = XEXP (reg_addend, 0);
7330 if (GET_CODE (reg_addend) != REG
7331 && GET_CODE (reg_addend) != MULT
7332 && GET_CODE (reg_addend) != ASHIFT)
7338 x = XEXP (XEXP (x, 1), 0);
7339 if (GET_CODE (x) == PLUS
7340 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7342 const_addend = XEXP (x, 1);
7346 if (GET_CODE (x) == UNSPEC
7347 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7348 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7349 result = XVECEXP (x, 0, 0);
7351 if (TARGET_MACHO && darwin_local_data_pic (x)
7352 && GET_CODE (orig_x) != MEM)
7353 result = XEXP (x, 0);
7359 result = gen_rtx_PLUS (Pmode, result, const_addend);
7361 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7366 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7371 if (mode == CCFPmode || mode == CCFPUmode)
7373 enum rtx_code second_code, bypass_code;
7374 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7375 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7376 code = ix86_fp_compare_code_to_integer (code);
7380 code = reverse_condition (code);
7391 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7395 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7396 Those same assemblers have the same but opposite lossage on cmov. */
7397 gcc_assert (mode == CCmode);
7398 suffix = fp ? "nbe" : "a";
7418 gcc_assert (mode == CCmode);
7440 gcc_assert (mode == CCmode);
7441 suffix = fp ? "nb" : "ae";
7444 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7448 gcc_assert (mode == CCmode);
7452 suffix = fp ? "u" : "p";
7455 suffix = fp ? "nu" : "np";
7460 fputs (suffix, file);
7463 /* Print the name of register X to FILE based on its machine mode and number.
7464 If CODE is 'w', pretend the mode is HImode.
7465 If CODE is 'b', pretend the mode is QImode.
7466 If CODE is 'k', pretend the mode is SImode.
7467 If CODE is 'q', pretend the mode is DImode.
7468 If CODE is 'h', pretend the reg is the 'high' byte register.
7469 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7472 print_reg (rtx x, int code, FILE *file)
7474 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7475 && REGNO (x) != FRAME_POINTER_REGNUM
7476 && REGNO (x) != FLAGS_REG
7477 && REGNO (x) != FPSR_REG);
7479 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7482 if (code == 'w' || MMX_REG_P (x))
7484 else if (code == 'b')
7486 else if (code == 'k')
7488 else if (code == 'q')
7490 else if (code == 'y')
7492 else if (code == 'h')
7495 code = GET_MODE_SIZE (GET_MODE (x));
7497 /* Irritatingly, AMD extended registers use different naming convention
7498 from the normal registers. */
7499 if (REX_INT_REG_P (x))
7501 gcc_assert (TARGET_64BIT);
7505 error ("extended registers have no high halves");
7508 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7511 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7514 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7517 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7520 error ("unsupported operand size for extended register");
7528 if (STACK_TOP_P (x))
7530 fputs ("st(0)", file);
7537 if (! ANY_FP_REG_P (x))
7538 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7543 fputs (hi_reg_name[REGNO (x)], file);
7546 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7548 fputs (qi_reg_name[REGNO (x)], file);
7551 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7553 fputs (qi_high_reg_name[REGNO (x)], file);
7560 /* Locate some local-dynamic symbol still in use by this function
7561 so that we can print its name in some tls_local_dynamic_base
7565 get_some_local_dynamic_name (void)
7569 if (cfun->machine->some_ld_name)
7570 return cfun->machine->some_ld_name;
7572 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7574 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7575 return cfun->machine->some_ld_name;
7581 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7585 if (GET_CODE (x) == SYMBOL_REF
7586 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7588 cfun->machine->some_ld_name = XSTR (x, 0);
7596 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7597 C -- print opcode suffix for set/cmov insn.
7598 c -- like C, but print reversed condition
7599 F,f -- likewise, but for floating-point.
7600 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7602 R -- print the prefix for register names.
7603 z -- print the opcode suffix for the size of the current operand.
7604 * -- print a star (in certain assembler syntax)
7605 A -- print an absolute memory reference.
7606 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7607 s -- print a shift double count, followed by the assemblers argument
7609 b -- print the QImode name of the register for the indicated operand.
7610 %b0 would print %al if operands[0] is reg 0.
7611 w -- likewise, print the HImode name of the register.
7612 k -- likewise, print the SImode name of the register.
7613 q -- likewise, print the DImode name of the register.
7614 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7615 y -- print "st(0)" instead of "st" as a register.
7616 D -- print condition for SSE cmp instruction.
7617 P -- if PIC, print an @PLT suffix.
7618 X -- don't print any sort of PIC '@' suffix for a symbol.
7619 & -- print some in-use local-dynamic symbol name.
7620 H -- print a memory address offset by 8; used for sse high-parts
7624 print_operand (FILE *file, rtx x, int code)
7631 if (ASSEMBLER_DIALECT == ASM_ATT)
7636 assemble_name (file, get_some_local_dynamic_name ());
7640 switch (ASSEMBLER_DIALECT)
7647 /* Intel syntax. For absolute addresses, registers should not
7648 be surrounded by braces. */
7649 if (GET_CODE (x) != REG)
7652 PRINT_OPERAND (file, x, 0);
7662 PRINT_OPERAND (file, x, 0);
7667 if (ASSEMBLER_DIALECT == ASM_ATT)
7672 if (ASSEMBLER_DIALECT == ASM_ATT)
7677 if (ASSEMBLER_DIALECT == ASM_ATT)
7682 if (ASSEMBLER_DIALECT == ASM_ATT)
7687 if (ASSEMBLER_DIALECT == ASM_ATT)
7692 if (ASSEMBLER_DIALECT == ASM_ATT)
7697 /* 387 opcodes don't get size suffixes if the operands are
7699 if (STACK_REG_P (x))
7702 /* Likewise if using Intel opcodes. */
7703 if (ASSEMBLER_DIALECT == ASM_INTEL)
7706 /* This is the size of op from size of operand. */
7707 switch (GET_MODE_SIZE (GET_MODE (x)))
7710 #ifdef HAVE_GAS_FILDS_FISTS
7716 if (GET_MODE (x) == SFmode)
7731 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7733 #ifdef GAS_MNEMONICS
7759 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7761 PRINT_OPERAND (file, x, 0);
7767 /* Little bit of braindamage here. The SSE compare instructions
7768 does use completely different names for the comparisons that the
7769 fp conditional moves. */
7770 switch (GET_CODE (x))
7785 fputs ("unord", file);
7789 fputs ("neq", file);
7793 fputs ("nlt", file);
7797 fputs ("nle", file);
7800 fputs ("ord", file);
7807 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7808 if (ASSEMBLER_DIALECT == ASM_ATT)
7810 switch (GET_MODE (x))
7812 case HImode: putc ('w', file); break;
7814 case SFmode: putc ('l', file); break;
7816 case DFmode: putc ('q', file); break;
7817 default: gcc_unreachable ();
7824 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7827 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7828 if (ASSEMBLER_DIALECT == ASM_ATT)
7831 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7834 /* Like above, but reverse condition */
7836 /* Check to see if argument to %c is really a constant
7837 and not a condition code which needs to be reversed. */
7838 if (!COMPARISON_P (x))
7840 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7843 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7846 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7847 if (ASSEMBLER_DIALECT == ASM_ATT)
7850 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7854 /* It doesn't actually matter what mode we use here, as we're
7855 only going to use this for printing. */
7856 x = adjust_address_nv (x, DImode, 8);
7863 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7866 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7869 int pred_val = INTVAL (XEXP (x, 0));
7871 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7872 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7874 int taken = pred_val > REG_BR_PROB_BASE / 2;
7875 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7877 /* Emit hints only in the case default branch prediction
7878 heuristics would fail. */
7879 if (taken != cputaken)
7881 /* We use 3e (DS) prefix for taken branches and
7882 2e (CS) prefix for not taken branches. */
7884 fputs ("ds ; ", file);
7886 fputs ("cs ; ", file);
7893 output_operand_lossage ("invalid operand code '%c'", code);
7897 if (GET_CODE (x) == REG)
7898 print_reg (x, code, file);
7900 else if (GET_CODE (x) == MEM)
7902 /* No `byte ptr' prefix for call instructions. */
7903 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7906 switch (GET_MODE_SIZE (GET_MODE (x)))
7908 case 1: size = "BYTE"; break;
7909 case 2: size = "WORD"; break;
7910 case 4: size = "DWORD"; break;
7911 case 8: size = "QWORD"; break;
7912 case 12: size = "XWORD"; break;
7913 case 16: size = "XMMWORD"; break;
7918 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7921 else if (code == 'w')
7923 else if (code == 'k')
7927 fputs (" PTR ", file);
7931 /* Avoid (%rip) for call operands. */
7932 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7933 && GET_CODE (x) != CONST_INT)
7934 output_addr_const (file, x);
7935 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7936 output_operand_lossage ("invalid constraints for operand");
7941 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7946 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7947 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7949 if (ASSEMBLER_DIALECT == ASM_ATT)
7951 fprintf (file, "0x%08lx", l);
7954 /* These float cases don't actually occur as immediate operands. */
7955 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7959 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7960 fprintf (file, "%s", dstr);
7963 else if (GET_CODE (x) == CONST_DOUBLE
7964 && GET_MODE (x) == XFmode)
7968 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7969 fprintf (file, "%s", dstr);
7974 /* We have patterns that allow zero sets of memory, for instance.
7975 In 64-bit mode, we should probably support all 8-byte vectors,
7976 since we can in fact encode that into an immediate. */
7977 if (GET_CODE (x) == CONST_VECTOR)
7979 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7985 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7987 if (ASSEMBLER_DIALECT == ASM_ATT)
7990 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7991 || GET_CODE (x) == LABEL_REF)
7993 if (ASSEMBLER_DIALECT == ASM_ATT)
7996 fputs ("OFFSET FLAT:", file);
7999 if (GET_CODE (x) == CONST_INT)
8000 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8002 output_pic_addr_const (file, x, code);
8004 output_addr_const (file, x);
8008 /* Print a memory operand whose address is ADDR. */
8011 print_operand_address (FILE *file, rtx addr)
8013 struct ix86_address parts;
8014 rtx base, index, disp;
8016 int ok = ix86_decompose_address (addr, &parts);
8021 index = parts.index;
8023 scale = parts.scale;
8031 if (USER_LABEL_PREFIX[0] == 0)
8033 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8039 if (!base && !index)
8041 /* Displacement only requires special attention. */
8043 if (GET_CODE (disp) == CONST_INT)
8045 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8047 if (USER_LABEL_PREFIX[0] == 0)
8049 fputs ("ds:", file);
8051 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8054 output_pic_addr_const (file, disp, 0);
8056 output_addr_const (file, disp);
8058 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8061 if (GET_CODE (disp) == CONST
8062 && GET_CODE (XEXP (disp, 0)) == PLUS
8063 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8064 disp = XEXP (XEXP (disp, 0), 0);
8065 if (GET_CODE (disp) == LABEL_REF
8066 || (GET_CODE (disp) == SYMBOL_REF
8067 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8068 fputs ("(%rip)", file);
8073 if (ASSEMBLER_DIALECT == ASM_ATT)
8078 output_pic_addr_const (file, disp, 0);
8079 else if (GET_CODE (disp) == LABEL_REF)
8080 output_asm_label (disp);
8082 output_addr_const (file, disp);
8087 print_reg (base, 0, file);
8091 print_reg (index, 0, file);
8093 fprintf (file, ",%d", scale);
8099 rtx offset = NULL_RTX;
8103 /* Pull out the offset of a symbol; print any symbol itself. */
8104 if (GET_CODE (disp) == CONST
8105 && GET_CODE (XEXP (disp, 0)) == PLUS
8106 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8108 offset = XEXP (XEXP (disp, 0), 1);
8109 disp = gen_rtx_CONST (VOIDmode,
8110 XEXP (XEXP (disp, 0), 0));
8114 output_pic_addr_const (file, disp, 0);
8115 else if (GET_CODE (disp) == LABEL_REF)
8116 output_asm_label (disp);
8117 else if (GET_CODE (disp) == CONST_INT)
8120 output_addr_const (file, disp);
8126 print_reg (base, 0, file);
8129 if (INTVAL (offset) >= 0)
8131 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8135 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8142 print_reg (index, 0, file);
8144 fprintf (file, "*%d", scale);
8152 output_addr_const_extra (FILE *file, rtx x)
8156 if (GET_CODE (x) != UNSPEC)
8159 op = XVECEXP (x, 0, 0);
8160 switch (XINT (x, 1))
8162 case UNSPEC_GOTTPOFF:
8163 output_addr_const (file, op);
8164 /* FIXME: This might be @TPOFF in Sun ld. */
8165 fputs ("@GOTTPOFF", file);
8168 output_addr_const (file, op);
8169 fputs ("@TPOFF", file);
8172 output_addr_const (file, op);
8174 fputs ("@TPOFF", file);
8176 fputs ("@NTPOFF", file);
8179 output_addr_const (file, op);
8180 fputs ("@DTPOFF", file);
8182 case UNSPEC_GOTNTPOFF:
8183 output_addr_const (file, op);
8185 fputs ("@GOTTPOFF(%rip)", file);
8187 fputs ("@GOTNTPOFF", file);
8189 case UNSPEC_INDNTPOFF:
8190 output_addr_const (file, op);
8191 fputs ("@INDNTPOFF", file);
8201 /* Split one or more DImode RTL references into pairs of SImode
8202 references. The RTL can be REG, offsettable MEM, integer constant, or
8203 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8204 split and "num" is its length. lo_half and hi_half are output arrays
8205 that parallel "operands". */
8208 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8212 rtx op = operands[num];
8214 /* simplify_subreg refuse to split volatile memory addresses,
8215 but we still have to handle it. */
8216 if (GET_CODE (op) == MEM)
8218 lo_half[num] = adjust_address (op, SImode, 0);
8219 hi_half[num] = adjust_address (op, SImode, 4);
8223 lo_half[num] = simplify_gen_subreg (SImode, op,
8224 GET_MODE (op) == VOIDmode
8225 ? DImode : GET_MODE (op), 0);
8226 hi_half[num] = simplify_gen_subreg (SImode, op,
8227 GET_MODE (op) == VOIDmode
8228 ? DImode : GET_MODE (op), 4);
8232 /* Split one or more TImode RTL references into pairs of DImode
8233 references. The RTL can be REG, offsettable MEM, integer constant, or
8234 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8235 split and "num" is its length. lo_half and hi_half are output arrays
8236 that parallel "operands". */
8239 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8243 rtx op = operands[num];
8245 /* simplify_subreg refuse to split volatile memory addresses, but we
8246 still have to handle it. */
8247 if (GET_CODE (op) == MEM)
8249 lo_half[num] = adjust_address (op, DImode, 0);
8250 hi_half[num] = adjust_address (op, DImode, 8);
8254 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8255 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8260 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8261 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8262 is the expression of the binary operation. The output may either be
8263 emitted here, or returned to the caller, like all output_* functions.
8265 There is no guarantee that the operands are the same mode, as they
8266 might be within FLOAT or FLOAT_EXTEND expressions. */
8268 #ifndef SYSV386_COMPAT
8269 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8270 wants to fix the assemblers because that causes incompatibility
8271 with gcc. No-one wants to fix gcc because that causes
8272 incompatibility with assemblers... You can use the option of
8273 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8274 #define SYSV386_COMPAT 1
8278 output_387_binary_op (rtx insn, rtx *operands)
8280 static char buf[30];
8283 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8285 #ifdef ENABLE_CHECKING
8286 /* Even if we do not want to check the inputs, this documents input
8287 constraints. Which helps in understanding the following code. */
8288 if (STACK_REG_P (operands[0])
8289 && ((REG_P (operands[1])
8290 && REGNO (operands[0]) == REGNO (operands[1])
8291 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8292 || (REG_P (operands[2])
8293 && REGNO (operands[0]) == REGNO (operands[2])
8294 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8295 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8298 gcc_assert (is_sse);
8301 switch (GET_CODE (operands[3]))
8304 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8305 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8313 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8314 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8322 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8323 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8331 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8332 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8346 if (GET_MODE (operands[0]) == SFmode)
8347 strcat (buf, "ss\t{%2, %0|%0, %2}");
8349 strcat (buf, "sd\t{%2, %0|%0, %2}");
8354 switch (GET_CODE (operands[3]))
8358 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8360 rtx temp = operands[2];
8361 operands[2] = operands[1];
8365 /* know operands[0] == operands[1]. */
8367 if (GET_CODE (operands[2]) == MEM)
8373 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8375 if (STACK_TOP_P (operands[0]))
8376 /* How is it that we are storing to a dead operand[2]?
8377 Well, presumably operands[1] is dead too. We can't
8378 store the result to st(0) as st(0) gets popped on this
8379 instruction. Instead store to operands[2] (which I
8380 think has to be st(1)). st(1) will be popped later.
8381 gcc <= 2.8.1 didn't have this check and generated
8382 assembly code that the Unixware assembler rejected. */
8383 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8385 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8389 if (STACK_TOP_P (operands[0]))
8390 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8392 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8397 if (GET_CODE (operands[1]) == MEM)
8403 if (GET_CODE (operands[2]) == MEM)
8409 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8412 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8413 derived assemblers, confusingly reverse the direction of
8414 the operation for fsub{r} and fdiv{r} when the
8415 destination register is not st(0). The Intel assembler
8416 doesn't have this brain damage. Read !SYSV386_COMPAT to
8417 figure out what the hardware really does. */
8418 if (STACK_TOP_P (operands[0]))
8419 p = "{p\t%0, %2|rp\t%2, %0}";
8421 p = "{rp\t%2, %0|p\t%0, %2}";
8423 if (STACK_TOP_P (operands[0]))
8424 /* As above for fmul/fadd, we can't store to st(0). */
8425 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8427 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8432 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8435 if (STACK_TOP_P (operands[0]))
8436 p = "{rp\t%0, %1|p\t%1, %0}";
8438 p = "{p\t%1, %0|rp\t%0, %1}";
8440 if (STACK_TOP_P (operands[0]))
8441 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8443 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8448 if (STACK_TOP_P (operands[0]))
8450 if (STACK_TOP_P (operands[1]))
8451 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8453 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8456 else if (STACK_TOP_P (operands[1]))
8459 p = "{\t%1, %0|r\t%0, %1}";
8461 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8467 p = "{r\t%2, %0|\t%0, %2}";
8469 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8482 /* Return needed mode for entity in optimize_mode_switching pass. */
8485 ix86_mode_needed (int entity, rtx insn)
8487 enum attr_i387_cw mode;
8489 /* The mode UNINITIALIZED is used to store control word after a
8490 function call or ASM pattern. The mode ANY specify that function
8491 has no requirements on the control word and make no changes in the
8492 bits we are interested in. */
8495 || (NONJUMP_INSN_P (insn)
8496 && (asm_noperands (PATTERN (insn)) >= 0
8497 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8498 return I387_CW_UNINITIALIZED;
8500 if (recog_memoized (insn) < 0)
8503 mode = get_attr_i387_cw (insn);
8508 if (mode == I387_CW_TRUNC)
8513 if (mode == I387_CW_FLOOR)
8518 if (mode == I387_CW_CEIL)
8523 if (mode == I387_CW_MASK_PM)
8534 /* Output code to initialize control word copies used by trunc?f?i and
8535 rounding patterns. CURRENT_MODE is set to current control word,
8536 while NEW_MODE is set to new control word. */
8539 emit_i387_cw_initialization (int mode)
8541 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8546 rtx reg = gen_reg_rtx (HImode);
8548 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8549 emit_move_insn (reg, stored_mode);
8551 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8556 /* round toward zero (truncate) */
8557 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8558 slot = SLOT_CW_TRUNC;
8562 /* round down toward -oo */
8563 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8564 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8565 slot = SLOT_CW_FLOOR;
8569 /* round up toward +oo */
8570 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8571 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8572 slot = SLOT_CW_CEIL;
8575 case I387_CW_MASK_PM:
8576 /* mask precision exception for nearbyint() */
8577 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8578 slot = SLOT_CW_MASK_PM;
8590 /* round toward zero (truncate) */
8591 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8592 slot = SLOT_CW_TRUNC;
8596 /* round down toward -oo */
8597 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8598 slot = SLOT_CW_FLOOR;
8602 /* round up toward +oo */
8603 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8604 slot = SLOT_CW_CEIL;
8607 case I387_CW_MASK_PM:
8608 /* mask precision exception for nearbyint() */
8609 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8610 slot = SLOT_CW_MASK_PM;
8618 gcc_assert (slot < MAX_386_STACK_LOCALS);
8620 new_mode = assign_386_stack_local (HImode, slot);
8621 emit_move_insn (new_mode, reg);
8624 /* Output code for INSN to convert a float to a signed int. OPERANDS
8625 are the insn operands. The output may be [HSD]Imode and the input
8626 operand may be [SDX]Fmode. */
8629 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8631 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8632 int dimode_p = GET_MODE (operands[0]) == DImode;
8633 int round_mode = get_attr_i387_cw (insn);
8635 /* Jump through a hoop or two for DImode, since the hardware has no
8636 non-popping instruction. We used to do this a different way, but
8637 that was somewhat fragile and broke with post-reload splitters. */
8638 if ((dimode_p || fisttp) && !stack_top_dies)
8639 output_asm_insn ("fld\t%y1", operands);
8641 gcc_assert (STACK_TOP_P (operands[1]));
8642 gcc_assert (GET_CODE (operands[0]) == MEM);
8645 output_asm_insn ("fisttp%z0\t%0", operands);
8648 if (round_mode != I387_CW_ANY)
8649 output_asm_insn ("fldcw\t%3", operands);
8650 if (stack_top_dies || dimode_p)
8651 output_asm_insn ("fistp%z0\t%0", operands);
8653 output_asm_insn ("fist%z0\t%0", operands);
8654 if (round_mode != I387_CW_ANY)
8655 output_asm_insn ("fldcw\t%2", operands);
8661 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8662 have the values zero or one, indicates the ffreep insn's operand
8663 from the OPERANDS array. */
8666 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8668 if (TARGET_USE_FFREEP)
8669 #if HAVE_AS_IX86_FFREEP
8670 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8672 switch (REGNO (operands[opno]))
8674 case FIRST_STACK_REG + 0: return ".word\t0xc0df";
8675 case FIRST_STACK_REG + 1: return ".word\t0xc1df";
8676 case FIRST_STACK_REG + 2: return ".word\t0xc2df";
8677 case FIRST_STACK_REG + 3: return ".word\t0xc3df";
8678 case FIRST_STACK_REG + 4: return ".word\t0xc4df";
8679 case FIRST_STACK_REG + 5: return ".word\t0xc5df";
8680 case FIRST_STACK_REG + 6: return ".word\t0xc6df";
8681 case FIRST_STACK_REG + 7: return ".word\t0xc7df";
8685 return opno ? "fstp\t%y1" : "fstp\t%y0";
8689 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8690 should be used. UNORDERED_P is true when fucom should be used. */
8693 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8696 rtx cmp_op0, cmp_op1;
8697 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8701 cmp_op0 = operands[0];
8702 cmp_op1 = operands[1];
8706 cmp_op0 = operands[1];
8707 cmp_op1 = operands[2];
8712 if (GET_MODE (operands[0]) == SFmode)
8714 return "ucomiss\t{%1, %0|%0, %1}";
8716 return "comiss\t{%1, %0|%0, %1}";
8719 return "ucomisd\t{%1, %0|%0, %1}";
8721 return "comisd\t{%1, %0|%0, %1}";
8724 gcc_assert (STACK_TOP_P (cmp_op0));
8726 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8728 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8732 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8733 return output_387_ffreep (operands, 1);
8736 return "ftst\n\tfnstsw\t%0";
8739 if (STACK_REG_P (cmp_op1)
8741 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8742 && REGNO (cmp_op1) != FIRST_STACK_REG)
8744 /* If both the top of the 387 stack dies, and the other operand
8745 is also a stack register that dies, then this must be a
8746 `fcompp' float compare */
8750 /* There is no double popping fcomi variant. Fortunately,
8751 eflags is immune from the fstp's cc clobbering. */
8753 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8755 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8756 return output_387_ffreep (operands, 0);
8761 return "fucompp\n\tfnstsw\t%0";
8763 return "fcompp\n\tfnstsw\t%0";
8768 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8770 static const char * const alt[16] =
8772 "fcom%z2\t%y2\n\tfnstsw\t%0",
8773 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8774 "fucom%z2\t%y2\n\tfnstsw\t%0",
8775 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8777 "ficom%z2\t%y2\n\tfnstsw\t%0",
8778 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8782 "fcomi\t{%y1, %0|%0, %y1}",
8783 "fcomip\t{%y1, %0|%0, %y1}",
8784 "fucomi\t{%y1, %0|%0, %y1}",
8785 "fucomip\t{%y1, %0|%0, %y1}",
8796 mask = eflags_p << 3;
8797 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8798 mask |= unordered_p << 1;
8799 mask |= stack_top_dies;
8801 gcc_assert (mask < 16);
8810 ix86_output_addr_vec_elt (FILE *file, int value)
8812 const char *directive = ASM_LONG;
8816 directive = ASM_QUAD;
8818 gcc_assert (!TARGET_64BIT);
8821 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8825 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8828 fprintf (file, "%s%s%d-%s%d\n",
8829 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8830 else if (HAVE_AS_GOTOFF_IN_DATA)
8831 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8833 else if (TARGET_MACHO)
8835 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8836 machopic_output_function_base_name (file);
8837 fprintf(file, "\n");
8841 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8842 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8845 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8849 ix86_expand_clear (rtx dest)
8853 /* We play register width games, which are only valid after reload. */
8854 gcc_assert (reload_completed);
8856 /* Avoid HImode and its attendant prefix byte. */
8857 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8858 dest = gen_rtx_REG (SImode, REGNO (dest));
8860 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8862 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8863 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8865 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8866 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8872 /* X is an unchanging MEM. If it is a constant pool reference, return
8873 the constant pool rtx, else NULL. */
8876 maybe_get_pool_constant (rtx x)
8878 x = ix86_delegitimize_address (XEXP (x, 0));
8880 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8881 return get_pool_constant (x);
8887 ix86_expand_move (enum machine_mode mode, rtx operands[])
8889 int strict = (reload_in_progress || reload_completed);
8891 enum tls_model model;
8896 if (GET_CODE (op1) == SYMBOL_REF)
8898 model = SYMBOL_REF_TLS_MODEL (op1);
8901 op1 = legitimize_tls_address (op1, model, true);
8902 op1 = force_operand (op1, op0);
8907 else if (GET_CODE (op1) == CONST
8908 && GET_CODE (XEXP (op1, 0)) == PLUS
8909 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8911 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8914 rtx addend = XEXP (XEXP (op1, 0), 1);
8915 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8916 op1 = force_operand (op1, NULL);
8917 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8918 op0, 1, OPTAB_DIRECT);
8924 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8926 if (TARGET_MACHO && !TARGET_64BIT)
8931 rtx temp = ((reload_in_progress
8932 || ((op0 && GET_CODE (op0) == REG)
8934 ? op0 : gen_reg_rtx (Pmode));
8935 op1 = machopic_indirect_data_reference (op1, temp);
8936 op1 = machopic_legitimize_pic_address (op1, mode,
8937 temp == op1 ? 0 : temp);
8939 else if (MACHOPIC_INDIRECT)
8940 op1 = machopic_indirect_data_reference (op1, 0);
8947 if (GET_CODE (op0) == MEM)
8948 op1 = force_reg (Pmode, op1);
8950 op1 = legitimize_address (op1, op1, Pmode);
8955 if (GET_CODE (op0) == MEM
8956 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8957 || !push_operand (op0, mode))
8958 && GET_CODE (op1) == MEM)
8959 op1 = force_reg (mode, op1);
8961 if (push_operand (op0, mode)
8962 && ! general_no_elim_operand (op1, mode))
8963 op1 = copy_to_mode_reg (mode, op1);
8965 /* Force large constants in 64bit compilation into register
8966 to get them CSEed. */
8967 if (TARGET_64BIT && mode == DImode
8968 && immediate_operand (op1, mode)
8969 && !x86_64_zext_immediate_operand (op1, VOIDmode)
8970 && !register_operand (op0, mode)
8971 && optimize && !reload_completed && !reload_in_progress)
8972 op1 = copy_to_mode_reg (mode, op1);
8974 if (FLOAT_MODE_P (mode))
8976 /* If we are loading a floating point constant to a register,
8977 force the value to memory now, since we'll get better code
8978 out the back end. */
8982 else if (GET_CODE (op1) == CONST_DOUBLE)
8984 op1 = validize_mem (force_const_mem (mode, op1));
8985 if (!register_operand (op0, mode))
8987 rtx temp = gen_reg_rtx (mode);
8988 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8989 emit_move_insn (op0, temp);
8996 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9000 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9002 rtx op0 = operands[0], op1 = operands[1];
9004 /* Force constants other than zero into memory. We do not know how
9005 the instructions used to build constants modify the upper 64 bits
9006 of the register, once we have that information we may be able
9007 to handle some of them more efficiently. */
9008 if ((reload_in_progress | reload_completed) == 0
9009 && register_operand (op0, mode)
9011 && standard_sse_constant_p (op1) <= 0)
9012 op1 = validize_mem (force_const_mem (mode, op1));
9014 /* Make operand1 a register if it isn't already. */
9016 && !register_operand (op0, mode)
9017 && !register_operand (op1, mode))
9019 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9023 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9026 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9027 straight to ix86_expand_vector_move. */
9030 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9039 /* If we're optimizing for size, movups is the smallest. */
9042 op0 = gen_lowpart (V4SFmode, op0);
9043 op1 = gen_lowpart (V4SFmode, op1);
9044 emit_insn (gen_sse_movups (op0, op1));
9048 /* ??? If we have typed data, then it would appear that using
9049 movdqu is the only way to get unaligned data loaded with
9051 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9053 op0 = gen_lowpart (V16QImode, op0);
9054 op1 = gen_lowpart (V16QImode, op1);
9055 emit_insn (gen_sse2_movdqu (op0, op1));
9059 if (TARGET_SSE2 && mode == V2DFmode)
9063 /* When SSE registers are split into halves, we can avoid
9064 writing to the top half twice. */
9065 if (TARGET_SSE_SPLIT_REGS)
9067 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9072 /* ??? Not sure about the best option for the Intel chips.
9073 The following would seem to satisfy; the register is
9074 entirely cleared, breaking the dependency chain. We
9075 then store to the upper half, with a dependency depth
9076 of one. A rumor has it that Intel recommends two movsd
9077 followed by an unpacklpd, but this is unconfirmed. And
9078 given that the dependency depth of the unpacklpd would
9079 still be one, I'm not sure why this would be better. */
9080 zero = CONST0_RTX (V2DFmode);
9083 m = adjust_address (op1, DFmode, 0);
9084 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9085 m = adjust_address (op1, DFmode, 8);
9086 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9090 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9091 emit_move_insn (op0, CONST0_RTX (mode));
9093 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9095 if (mode != V4SFmode)
9096 op0 = gen_lowpart (V4SFmode, op0);
9097 m = adjust_address (op1, V2SFmode, 0);
9098 emit_insn (gen_sse_loadlps (op0, op0, m));
9099 m = adjust_address (op1, V2SFmode, 8);
9100 emit_insn (gen_sse_loadhps (op0, op0, m));
9103 else if (MEM_P (op0))
9105 /* If we're optimizing for size, movups is the smallest. */
9108 op0 = gen_lowpart (V4SFmode, op0);
9109 op1 = gen_lowpart (V4SFmode, op1);
9110 emit_insn (gen_sse_movups (op0, op1));
9114 /* ??? Similar to above, only less clear because of quote
9115 typeless stores unquote. */
9116 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9117 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9119 op0 = gen_lowpart (V16QImode, op0);
9120 op1 = gen_lowpart (V16QImode, op1);
9121 emit_insn (gen_sse2_movdqu (op0, op1));
9125 if (TARGET_SSE2 && mode == V2DFmode)
9127 m = adjust_address (op0, DFmode, 0);
9128 emit_insn (gen_sse2_storelpd (m, op1));
9129 m = adjust_address (op0, DFmode, 8);
9130 emit_insn (gen_sse2_storehpd (m, op1));
9134 if (mode != V4SFmode)
9135 op1 = gen_lowpart (V4SFmode, op1);
9136 m = adjust_address (op0, V2SFmode, 0);
9137 emit_insn (gen_sse_storelps (m, op1));
9138 m = adjust_address (op0, V2SFmode, 8);
9139 emit_insn (gen_sse_storehps (m, op1));
9146 /* Expand a push in MODE. This is some mode for which we do not support
9147 proper push instructions, at least from the registers that we expect
9148 the value to live in. */
9151 ix86_expand_push (enum machine_mode mode, rtx x)
9155 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9156 GEN_INT (-GET_MODE_SIZE (mode)),
9157 stack_pointer_rtx, 1, OPTAB_DIRECT);
9158 if (tmp != stack_pointer_rtx)
9159 emit_move_insn (stack_pointer_rtx, tmp);
9161 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9162 emit_move_insn (tmp, x);
9165 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9166 destination to use for the operation. If different from the true
9167 destination in operands[0], a copy operation will be required. */
9170 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9173 int matching_memory;
9174 rtx src1, src2, dst;
9180 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9181 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9182 && (rtx_equal_p (dst, src2)
9183 || immediate_operand (src1, mode)))
9190 /* If the destination is memory, and we do not have matching source
9191 operands, do things in registers. */
9192 matching_memory = 0;
9193 if (GET_CODE (dst) == MEM)
9195 if (rtx_equal_p (dst, src1))
9196 matching_memory = 1;
9197 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9198 && rtx_equal_p (dst, src2))
9199 matching_memory = 2;
9201 dst = gen_reg_rtx (mode);
9204 /* Both source operands cannot be in memory. */
9205 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9207 if (matching_memory != 2)
9208 src2 = force_reg (mode, src2);
9210 src1 = force_reg (mode, src1);
9213 /* If the operation is not commutable, source 1 cannot be a constant
9214 or non-matching memory. */
9215 if ((CONSTANT_P (src1)
9216 || (!matching_memory && GET_CODE (src1) == MEM))
9217 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9218 src1 = force_reg (mode, src1);
9220 src1 = operands[1] = src1;
9221 src2 = operands[2] = src2;
9225 /* Similarly, but assume that the destination has already been
9229 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9230 enum machine_mode mode, rtx operands[])
9232 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9233 gcc_assert (dst == operands[0]);
9236 /* Attempt to expand a binary operator. Make the expansion closer to the
9237 actual machine, then just general_operand, which will allow 3 separate
9238 memory references (one output, two input) in a single insn. */
9241 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9244 rtx src1, src2, dst, op, clob;
9246 dst = ix86_fixup_binary_operands (code, mode, operands);
9250 /* Emit the instruction. */
9252 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9253 if (reload_in_progress)
9255 /* Reload doesn't know about the flags register, and doesn't know that
9256 it doesn't want to clobber it. We can only do this with PLUS. */
9257 gcc_assert (code == PLUS);
9262 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9263 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9266 /* Fix up the destination if needed. */
9267 if (dst != operands[0])
9268 emit_move_insn (operands[0], dst);
9271 /* Return TRUE or FALSE depending on whether the binary operator meets the
9272 appropriate constraints. */
9275 ix86_binary_operator_ok (enum rtx_code code,
9276 enum machine_mode mode ATTRIBUTE_UNUSED,
9279 /* Both source operands cannot be in memory. */
9280 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9282 /* If the operation is not commutable, source 1 cannot be a constant. */
9283 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9285 /* If the destination is memory, we must have a matching source operand. */
9286 if (GET_CODE (operands[0]) == MEM
9287 && ! (rtx_equal_p (operands[0], operands[1])
9288 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9289 && rtx_equal_p (operands[0], operands[2]))))
9291 /* If the operation is not commutable and the source 1 is memory, we must
9292 have a matching destination. */
9293 if (GET_CODE (operands[1]) == MEM
9294 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9295 && ! rtx_equal_p (operands[0], operands[1]))
9300 /* Attempt to expand a unary operator. Make the expansion closer to the
9301 actual machine, then just general_operand, which will allow 2 separate
9302 memory references (one output, one input) in a single insn. */
9305 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9308 int matching_memory;
9309 rtx src, dst, op, clob;
9314 /* If the destination is memory, and we do not have matching source
9315 operands, do things in registers. */
9316 matching_memory = 0;
9319 if (rtx_equal_p (dst, src))
9320 matching_memory = 1;
9322 dst = gen_reg_rtx (mode);
9325 /* When source operand is memory, destination must match. */
9326 if (MEM_P (src) && !matching_memory)
9327 src = force_reg (mode, src);
9329 /* Emit the instruction. */
9331 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9332 if (reload_in_progress || code == NOT)
9334 /* Reload doesn't know about the flags register, and doesn't know that
9335 it doesn't want to clobber it. */
9336 gcc_assert (code == NOT);
9341 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9342 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9345 /* Fix up the destination if needed. */
9346 if (dst != operands[0])
9347 emit_move_insn (operands[0], dst);
9350 /* Return TRUE or FALSE depending on whether the unary operator meets the
9351 appropriate constraints. */
9354 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9355 enum machine_mode mode ATTRIBUTE_UNUSED,
9356 rtx operands[2] ATTRIBUTE_UNUSED)
9358 /* If one of operands is memory, source and destination must match. */
9359 if ((GET_CODE (operands[0]) == MEM
9360 || GET_CODE (operands[1]) == MEM)
9361 && ! rtx_equal_p (operands[0], operands[1]))
9366 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9367 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9368 true, then replicate the mask for all elements of the vector register.
9369 If INVERT is true, then create a mask excluding the sign bit. */
9372 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9374 enum machine_mode vec_mode;
9375 HOST_WIDE_INT hi, lo;
9380 /* Find the sign bit, sign extended to 2*HWI. */
9382 lo = 0x80000000, hi = lo < 0;
9383 else if (HOST_BITS_PER_WIDE_INT >= 64)
9384 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9386 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9391 /* Force this value into the low part of a fp vector constant. */
9392 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9393 mask = gen_lowpart (mode, mask);
9398 v = gen_rtvec (4, mask, mask, mask, mask);
9400 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9401 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9402 vec_mode = V4SFmode;
9407 v = gen_rtvec (2, mask, mask);
9409 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9410 vec_mode = V2DFmode;
9413 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9416 /* Generate code for floating point ABS or NEG. */
9419 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9422 rtx mask, set, use, clob, dst, src;
9423 bool matching_memory;
9424 bool use_sse = false;
9425 bool vector_mode = VECTOR_MODE_P (mode);
9426 enum machine_mode elt_mode = mode;
9430 elt_mode = GET_MODE_INNER (mode);
9433 else if (TARGET_SSE_MATH)
9434 use_sse = SSE_FLOAT_MODE_P (mode);
9436 /* NEG and ABS performed with SSE use bitwise mask operations.
9437 Create the appropriate mask now. */
9439 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9446 /* If the destination is memory, and we don't have matching source
9447 operands or we're using the x87, do things in registers. */
9448 matching_memory = false;
9451 if (use_sse && rtx_equal_p (dst, src))
9452 matching_memory = true;
9454 dst = gen_reg_rtx (mode);
9456 if (MEM_P (src) && !matching_memory)
9457 src = force_reg (mode, src);
9461 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9462 set = gen_rtx_SET (VOIDmode, dst, set);
9467 set = gen_rtx_fmt_e (code, mode, src);
9468 set = gen_rtx_SET (VOIDmode, dst, set);
9471 use = gen_rtx_USE (VOIDmode, mask);
9472 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9473 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9474 gen_rtvec (3, set, use, clob)));
9480 if (dst != operands[0])
9481 emit_move_insn (operands[0], dst);
9484 /* Expand a copysign operation. Special case operand 0 being a constant. */
9487 ix86_expand_copysign (rtx operands[])
9489 enum machine_mode mode, vmode;
9490 rtx dest, op0, op1, mask, nmask;
9496 mode = GET_MODE (dest);
9497 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9499 if (GET_CODE (op0) == CONST_DOUBLE)
9503 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9504 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9506 if (op0 == CONST0_RTX (mode))
9507 op0 = CONST0_RTX (vmode);
9511 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9512 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9514 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9515 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9518 mask = ix86_build_signbit_mask (mode, 0, 0);
9521 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9523 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9527 nmask = ix86_build_signbit_mask (mode, 0, 1);
9528 mask = ix86_build_signbit_mask (mode, 0, 0);
9531 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9533 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9537 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9538 be a constant, and so has already been expanded into a vector constant. */
9541 ix86_split_copysign_const (rtx operands[])
9543 enum machine_mode mode, vmode;
9544 rtx dest, op0, op1, mask, x;
9551 mode = GET_MODE (dest);
9552 vmode = GET_MODE (mask);
9554 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9555 x = gen_rtx_AND (vmode, dest, mask);
9556 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9558 if (op0 != CONST0_RTX (vmode))
9560 x = gen_rtx_IOR (vmode, dest, op0);
9561 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9565 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9566 so we have to do two masks. */
9569 ix86_split_copysign_var (rtx operands[])
9571 enum machine_mode mode, vmode;
9572 rtx dest, scratch, op0, op1, mask, nmask, x;
9575 scratch = operands[1];
9578 nmask = operands[4];
9581 mode = GET_MODE (dest);
9582 vmode = GET_MODE (mask);
9584 if (rtx_equal_p (op0, op1))
9586 /* Shouldn't happen often (it's useless, obviously), but when it does
9587 we'd generate incorrect code if we continue below. */
9588 emit_move_insn (dest, op0);
9592 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9594 gcc_assert (REGNO (op1) == REGNO (scratch));
9596 x = gen_rtx_AND (vmode, scratch, mask);
9597 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9600 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9601 x = gen_rtx_NOT (vmode, dest);
9602 x = gen_rtx_AND (vmode, x, op0);
9603 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9607 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9609 x = gen_rtx_AND (vmode, scratch, mask);
9611 else /* alternative 2,4 */
9613 gcc_assert (REGNO (mask) == REGNO (scratch));
9614 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9615 x = gen_rtx_AND (vmode, scratch, op1);
9617 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9619 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9621 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9622 x = gen_rtx_AND (vmode, dest, nmask);
9624 else /* alternative 3,4 */
9626 gcc_assert (REGNO (nmask) == REGNO (dest));
9628 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9629 x = gen_rtx_AND (vmode, dest, op0);
9631 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9634 x = gen_rtx_IOR (vmode, dest, scratch);
9635 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9638 /* Return TRUE or FALSE depending on whether the first SET in INSN
9639 has source and destination with matching CC modes, and that the
9640 CC mode is at least as constrained as REQ_MODE. */
9643 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9646 enum machine_mode set_mode;
9648 set = PATTERN (insn);
9649 if (GET_CODE (set) == PARALLEL)
9650 set = XVECEXP (set, 0, 0);
9651 gcc_assert (GET_CODE (set) == SET);
9652 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9654 set_mode = GET_MODE (SET_DEST (set));
9658 if (req_mode != CCNOmode
9659 && (req_mode != CCmode
9660 || XEXP (SET_SRC (set), 1) != const0_rtx))
9664 if (req_mode == CCGCmode)
9668 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9672 if (req_mode == CCZmode)
9682 return (GET_MODE (SET_SRC (set)) == set_mode);
9685 /* Generate insn patterns to do an integer compare of OPERANDS. */
9688 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9690 enum machine_mode cmpmode;
9693 cmpmode = SELECT_CC_MODE (code, op0, op1);
9694 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9696 /* This is very simple, but making the interface the same as in the
9697 FP case makes the rest of the code easier. */
9698 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9699 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9701 /* Return the test that should be put into the flags user, i.e.
9702 the bcc, scc, or cmov instruction. */
9703 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9706 /* Figure out whether to use ordered or unordered fp comparisons.
9707 Return the appropriate mode to use. */
9710 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9712 /* ??? In order to make all comparisons reversible, we do all comparisons
9713 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9714 all forms trapping and nontrapping comparisons, we can make inequality
9715 comparisons trapping again, since it results in better code when using
9716 FCOM based compares. */
9717 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9721 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9723 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9724 return ix86_fp_compare_mode (code);
9727 /* Only zero flag is needed. */
9729 case NE: /* ZF!=0 */
9731 /* Codes needing carry flag. */
9732 case GEU: /* CF=0 */
9733 case GTU: /* CF=0 & ZF=0 */
9734 case LTU: /* CF=1 */
9735 case LEU: /* CF=1 | ZF=1 */
9737 /* Codes possibly doable only with sign flag when
9738 comparing against zero. */
9739 case GE: /* SF=OF or SF=0 */
9740 case LT: /* SF<>OF or SF=1 */
9741 if (op1 == const0_rtx)
9744 /* For other cases Carry flag is not required. */
9746 /* Codes doable only with sign flag when comparing
9747 against zero, but we miss jump instruction for it
9748 so we need to use relational tests against overflow
9749 that thus needs to be zero. */
9750 case GT: /* ZF=0 & SF=OF */
9751 case LE: /* ZF=1 | SF<>OF */
9752 if (op1 == const0_rtx)
9756 /* strcmp pattern do (use flags) and combine may ask us for proper
9765 /* Return the fixed registers used for condition codes. */
9768 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9775 /* If two condition code modes are compatible, return a condition code
9776 mode which is compatible with both. Otherwise, return
9779 static enum machine_mode
9780 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9785 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9788 if ((m1 == CCGCmode && m2 == CCGOCmode)
9789 || (m1 == CCGOCmode && m2 == CCGCmode))
9817 /* These are only compatible with themselves, which we already
9823 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9826 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9828 enum rtx_code swapped_code = swap_condition (code);
9829 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9830 || (ix86_fp_comparison_cost (swapped_code)
9831 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9834 /* Swap, force into registers, or otherwise massage the two operands
9835 to a fp comparison. The operands are updated in place; the new
9836 comparison code is returned. */
9838 static enum rtx_code
9839 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9841 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9842 rtx op0 = *pop0, op1 = *pop1;
9843 enum machine_mode op_mode = GET_MODE (op0);
9844 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9846 /* All of the unordered compare instructions only work on registers.
9847 The same is true of the fcomi compare instructions. The XFmode
9848 compare instructions require registers except when comparing
9849 against zero or when converting operand 1 from fixed point to
9853 && (fpcmp_mode == CCFPUmode
9854 || (op_mode == XFmode
9855 && ! (standard_80387_constant_p (op0) == 1
9856 || standard_80387_constant_p (op1) == 1)
9857 && GET_CODE (op1) != FLOAT)
9858 || ix86_use_fcomi_compare (code)))
9860 op0 = force_reg (op_mode, op0);
9861 op1 = force_reg (op_mode, op1);
9865 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9866 things around if they appear profitable, otherwise force op0
9869 if (standard_80387_constant_p (op0) == 0
9870 || (GET_CODE (op0) == MEM
9871 && ! (standard_80387_constant_p (op1) == 0
9872 || GET_CODE (op1) == MEM)))
9875 tmp = op0, op0 = op1, op1 = tmp;
9876 code = swap_condition (code);
9879 if (GET_CODE (op0) != REG)
9880 op0 = force_reg (op_mode, op0);
9882 if (CONSTANT_P (op1))
9884 int tmp = standard_80387_constant_p (op1);
9886 op1 = validize_mem (force_const_mem (op_mode, op1));
9890 op1 = force_reg (op_mode, op1);
9893 op1 = force_reg (op_mode, op1);
9897 /* Try to rearrange the comparison to make it cheaper. */
9898 if (ix86_fp_comparison_cost (code)
9899 > ix86_fp_comparison_cost (swap_condition (code))
9900 && (GET_CODE (op1) == REG || !no_new_pseudos))
9903 tmp = op0, op0 = op1, op1 = tmp;
9904 code = swap_condition (code);
9905 if (GET_CODE (op0) != REG)
9906 op0 = force_reg (op_mode, op0);
9914 /* Convert comparison codes we use to represent FP comparison to integer
9915 code that will result in proper branch. Return UNKNOWN if no such code
9919 ix86_fp_compare_code_to_integer (enum rtx_code code)
9948 /* Split comparison code CODE into comparisons we can do using branch
9949 instructions. BYPASS_CODE is comparison code for branch that will
9950 branch around FIRST_CODE and SECOND_CODE. If some of branches
9951 is not required, set value to UNKNOWN.
9952 We never require more than two branches. */
9955 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9956 enum rtx_code *first_code,
9957 enum rtx_code *second_code)
9960 *bypass_code = UNKNOWN;
9961 *second_code = UNKNOWN;
9963 /* The fcomi comparison sets flags as follows:
9973 case GT: /* GTU - CF=0 & ZF=0 */
9974 case GE: /* GEU - CF=0 */
9975 case ORDERED: /* PF=0 */
9976 case UNORDERED: /* PF=1 */
9977 case UNEQ: /* EQ - ZF=1 */
9978 case UNLT: /* LTU - CF=1 */
9979 case UNLE: /* LEU - CF=1 | ZF=1 */
9980 case LTGT: /* EQ - ZF=0 */
9982 case LT: /* LTU - CF=1 - fails on unordered */
9984 *bypass_code = UNORDERED;
9986 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9988 *bypass_code = UNORDERED;
9990 case EQ: /* EQ - ZF=1 - fails on unordered */
9992 *bypass_code = UNORDERED;
9994 case NE: /* NE - ZF=0 - fails on unordered */
9996 *second_code = UNORDERED;
9998 case UNGE: /* GEU - CF=0 - fails on unordered */
10000 *second_code = UNORDERED;
10002 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10004 *second_code = UNORDERED;
10007 gcc_unreachable ();
10009 if (!TARGET_IEEE_FP)
10011 *second_code = UNKNOWN;
10012 *bypass_code = UNKNOWN;
10016 /* Return cost of comparison done fcom + arithmetics operations on AX.
10017 All following functions do use number of instructions as a cost metrics.
10018 In future this should be tweaked to compute bytes for optimize_size and
10019 take into account performance of various instructions on various CPUs. */
10021 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10023 if (!TARGET_IEEE_FP)
10025 /* The cost of code output by ix86_expand_fp_compare. */
10049 gcc_unreachable ();
10053 /* Return cost of comparison done using fcomi operation.
10054 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10056 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10058 enum rtx_code bypass_code, first_code, second_code;
10059 /* Return arbitrarily high cost when instruction is not supported - this
10060 prevents gcc from using it. */
10063 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10064 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10067 /* Return cost of comparison done using sahf operation.
10068 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10070 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10072 enum rtx_code bypass_code, first_code, second_code;
10073 /* Return arbitrarily high cost when instruction is not preferred - this
10074 avoids gcc from using it. */
10075 if (!TARGET_USE_SAHF && !optimize_size)
10077 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10078 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10081 /* Compute cost of the comparison done using any method.
10082 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10084 ix86_fp_comparison_cost (enum rtx_code code)
10086 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10089 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10090 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10092 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10093 if (min > sahf_cost)
10095 if (min > fcomi_cost)
10100 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10103 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10104 rtx *second_test, rtx *bypass_test)
10106 enum machine_mode fpcmp_mode, intcmp_mode;
10108 int cost = ix86_fp_comparison_cost (code);
10109 enum rtx_code bypass_code, first_code, second_code;
10111 fpcmp_mode = ix86_fp_compare_mode (code);
10112 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10115 *second_test = NULL_RTX;
10117 *bypass_test = NULL_RTX;
10119 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10121 /* Do fcomi/sahf based test when profitable. */
10122 if ((bypass_code == UNKNOWN || bypass_test)
10123 && (second_code == UNKNOWN || second_test)
10124 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10128 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10129 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10135 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10136 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10138 scratch = gen_reg_rtx (HImode);
10139 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10140 emit_insn (gen_x86_sahf_1 (scratch));
10143 /* The FP codes work out to act like unsigned. */
10144 intcmp_mode = fpcmp_mode;
10146 if (bypass_code != UNKNOWN)
10147 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10148 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10150 if (second_code != UNKNOWN)
10151 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10152 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10157 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10158 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10159 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10161 scratch = gen_reg_rtx (HImode);
10162 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10164 /* In the unordered case, we have to check C2 for NaN's, which
10165 doesn't happen to work out to anything nice combination-wise.
10166 So do some bit twiddling on the value we've got in AH to come
10167 up with an appropriate set of condition codes. */
10169 intcmp_mode = CCNOmode;
10174 if (code == GT || !TARGET_IEEE_FP)
10176 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10181 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10182 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10183 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10184 intcmp_mode = CCmode;
10190 if (code == LT && TARGET_IEEE_FP)
10192 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10193 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10194 intcmp_mode = CCmode;
10199 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10205 if (code == GE || !TARGET_IEEE_FP)
10207 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10212 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10213 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10220 if (code == LE && TARGET_IEEE_FP)
10222 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10223 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10224 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10225 intcmp_mode = CCmode;
10230 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10236 if (code == EQ && TARGET_IEEE_FP)
10238 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10239 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10240 intcmp_mode = CCmode;
10245 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10252 if (code == NE && TARGET_IEEE_FP)
10254 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10255 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10261 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10267 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10271 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10276 gcc_unreachable ();
10280 /* Return the test that should be put into the flags user, i.e.
10281 the bcc, scc, or cmov instruction. */
10282 return gen_rtx_fmt_ee (code, VOIDmode,
10283 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10288 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10291 op0 = ix86_compare_op0;
10292 op1 = ix86_compare_op1;
10295 *second_test = NULL_RTX;
10297 *bypass_test = NULL_RTX;
10299 if (ix86_compare_emitted)
10301 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10302 ix86_compare_emitted = NULL_RTX;
10304 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10305 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10306 second_test, bypass_test);
10308 ret = ix86_expand_int_compare (code, op0, op1);
10313 /* Return true if the CODE will result in nontrivial jump sequence. */
10315 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10317 enum rtx_code bypass_code, first_code, second_code;
10320 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10321 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10325 ix86_expand_branch (enum rtx_code code, rtx label)
10329 /* If we have emitted a compare insn, go straight to simple.
10330 ix86_expand_compare won't emit anything if ix86_compare_emitted
10332 if (ix86_compare_emitted)
10335 switch (GET_MODE (ix86_compare_op0))
10341 tmp = ix86_expand_compare (code, NULL, NULL);
10342 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10343 gen_rtx_LABEL_REF (VOIDmode, label),
10345 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10354 enum rtx_code bypass_code, first_code, second_code;
10356 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10357 &ix86_compare_op1);
10359 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10361 /* Check whether we will use the natural sequence with one jump. If
10362 so, we can expand jump early. Otherwise delay expansion by
10363 creating compound insn to not confuse optimizers. */
10364 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10367 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10368 gen_rtx_LABEL_REF (VOIDmode, label),
10369 pc_rtx, NULL_RTX, NULL_RTX);
10373 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10374 ix86_compare_op0, ix86_compare_op1);
10375 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10376 gen_rtx_LABEL_REF (VOIDmode, label),
10378 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10380 use_fcomi = ix86_use_fcomi_compare (code);
10381 vec = rtvec_alloc (3 + !use_fcomi);
10382 RTVEC_ELT (vec, 0) = tmp;
10384 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10386 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10389 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10391 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10400 /* Expand DImode branch into multiple compare+branch. */
10402 rtx lo[2], hi[2], label2;
10403 enum rtx_code code1, code2, code3;
10404 enum machine_mode submode;
10406 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10408 tmp = ix86_compare_op0;
10409 ix86_compare_op0 = ix86_compare_op1;
10410 ix86_compare_op1 = tmp;
10411 code = swap_condition (code);
10413 if (GET_MODE (ix86_compare_op0) == DImode)
10415 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10416 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10421 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10422 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10426 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10427 avoid two branches. This costs one extra insn, so disable when
10428 optimizing for size. */
10430 if ((code == EQ || code == NE)
10432 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10437 if (hi[1] != const0_rtx)
10438 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10439 NULL_RTX, 0, OPTAB_WIDEN);
10442 if (lo[1] != const0_rtx)
10443 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10444 NULL_RTX, 0, OPTAB_WIDEN);
10446 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10447 NULL_RTX, 0, OPTAB_WIDEN);
10449 ix86_compare_op0 = tmp;
10450 ix86_compare_op1 = const0_rtx;
10451 ix86_expand_branch (code, label);
10455 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10456 op1 is a constant and the low word is zero, then we can just
10457 examine the high word. */
10459 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10462 case LT: case LTU: case GE: case GEU:
10463 ix86_compare_op0 = hi[0];
10464 ix86_compare_op1 = hi[1];
10465 ix86_expand_branch (code, label);
10471 /* Otherwise, we need two or three jumps. */
10473 label2 = gen_label_rtx ();
10476 code2 = swap_condition (code);
10477 code3 = unsigned_condition (code);
10481 case LT: case GT: case LTU: case GTU:
10484 case LE: code1 = LT; code2 = GT; break;
10485 case GE: code1 = GT; code2 = LT; break;
10486 case LEU: code1 = LTU; code2 = GTU; break;
10487 case GEU: code1 = GTU; code2 = LTU; break;
10489 case EQ: code1 = UNKNOWN; code2 = NE; break;
10490 case NE: code2 = UNKNOWN; break;
10493 gcc_unreachable ();
10498 * if (hi(a) < hi(b)) goto true;
10499 * if (hi(a) > hi(b)) goto false;
10500 * if (lo(a) < lo(b)) goto true;
10504 ix86_compare_op0 = hi[0];
10505 ix86_compare_op1 = hi[1];
10507 if (code1 != UNKNOWN)
10508 ix86_expand_branch (code1, label);
10509 if (code2 != UNKNOWN)
10510 ix86_expand_branch (code2, label2);
10512 ix86_compare_op0 = lo[0];
10513 ix86_compare_op1 = lo[1];
10514 ix86_expand_branch (code3, label);
10516 if (code2 != UNKNOWN)
10517 emit_label (label2);
10522 gcc_unreachable ();
10526 /* Split branch based on floating point condition. */
10528 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10529 rtx target1, rtx target2, rtx tmp, rtx pushed)
10531 rtx second, bypass;
10532 rtx label = NULL_RTX;
10534 int bypass_probability = -1, second_probability = -1, probability = -1;
10537 if (target2 != pc_rtx)
10540 code = reverse_condition_maybe_unordered (code);
10545 condition = ix86_expand_fp_compare (code, op1, op2,
10546 tmp, &second, &bypass);
10548 /* Remove pushed operand from stack. */
10550 ix86_free_from_memory (GET_MODE (pushed));
10552 if (split_branch_probability >= 0)
10554 /* Distribute the probabilities across the jumps.
10555 Assume the BYPASS and SECOND to be always test
10557 probability = split_branch_probability;
10559 /* Value of 1 is low enough to make no need for probability
10560 to be updated. Later we may run some experiments and see
10561 if unordered values are more frequent in practice. */
10563 bypass_probability = 1;
10565 second_probability = 1;
10567 if (bypass != NULL_RTX)
10569 label = gen_label_rtx ();
10570 i = emit_jump_insn (gen_rtx_SET
10572 gen_rtx_IF_THEN_ELSE (VOIDmode,
10574 gen_rtx_LABEL_REF (VOIDmode,
10577 if (bypass_probability >= 0)
10579 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10580 GEN_INT (bypass_probability),
10583 i = emit_jump_insn (gen_rtx_SET
10585 gen_rtx_IF_THEN_ELSE (VOIDmode,
10586 condition, target1, target2)));
10587 if (probability >= 0)
10589 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10590 GEN_INT (probability),
10592 if (second != NULL_RTX)
10594 i = emit_jump_insn (gen_rtx_SET
10596 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10598 if (second_probability >= 0)
10600 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10601 GEN_INT (second_probability),
10604 if (label != NULL_RTX)
10605 emit_label (label);
10609 ix86_expand_setcc (enum rtx_code code, rtx dest)
10611 rtx ret, tmp, tmpreg, equiv;
10612 rtx second_test, bypass_test;
10614 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10615 return 0; /* FAIL */
10617 gcc_assert (GET_MODE (dest) == QImode);
10619 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10620 PUT_MODE (ret, QImode);
10625 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10626 if (bypass_test || second_test)
10628 rtx test = second_test;
10630 rtx tmp2 = gen_reg_rtx (QImode);
10633 gcc_assert (!second_test);
10634 test = bypass_test;
10636 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10638 PUT_MODE (test, QImode);
10639 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10642 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10644 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10647 /* Attach a REG_EQUAL note describing the comparison result. */
10648 if (ix86_compare_op0 && ix86_compare_op1)
10650 equiv = simplify_gen_relational (code, QImode,
10651 GET_MODE (ix86_compare_op0),
10652 ix86_compare_op0, ix86_compare_op1);
10653 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10656 return 1; /* DONE */
10659 /* Expand comparison setting or clearing carry flag. Return true when
10660 successful and set pop for the operation. */
10662 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10664 enum machine_mode mode =
10665 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10667 /* Do not handle DImode compares that go through special path. Also we can't
10668 deal with FP compares yet. This is possible to add. */
10669 if (mode == (TARGET_64BIT ? TImode : DImode))
10671 if (FLOAT_MODE_P (mode))
10673 rtx second_test = NULL, bypass_test = NULL;
10674 rtx compare_op, compare_seq;
10676 /* Shortcut: following common codes never translate into carry flag compares. */
10677 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10678 || code == ORDERED || code == UNORDERED)
10681 /* These comparisons require zero flag; swap operands so they won't. */
10682 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10683 && !TARGET_IEEE_FP)
10688 code = swap_condition (code);
10691 /* Try to expand the comparison and verify that we end up with carry flag
10692 based comparison. This is fails to be true only when we decide to expand
10693 comparison using arithmetic that is not too common scenario. */
10695 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10696 &second_test, &bypass_test);
10697 compare_seq = get_insns ();
10700 if (second_test || bypass_test)
10702 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10703 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10704 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10706 code = GET_CODE (compare_op);
10707 if (code != LTU && code != GEU)
10709 emit_insn (compare_seq);
10713 if (!INTEGRAL_MODE_P (mode))
10721 /* Convert a==0 into (unsigned)a<1. */
10724 if (op1 != const0_rtx)
10727 code = (code == EQ ? LTU : GEU);
10730 /* Convert a>b into b<a or a>=b-1. */
10733 if (GET_CODE (op1) == CONST_INT)
10735 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10736 /* Bail out on overflow. We still can swap operands but that
10737 would force loading of the constant into register. */
10738 if (op1 == const0_rtx
10739 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10741 code = (code == GTU ? GEU : LTU);
10748 code = (code == GTU ? LTU : GEU);
10752 /* Convert a>=0 into (unsigned)a<0x80000000. */
10755 if (mode == DImode || op1 != const0_rtx)
10757 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10758 code = (code == LT ? GEU : LTU);
10762 if (mode == DImode || op1 != constm1_rtx)
10764 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10765 code = (code == LE ? GEU : LTU);
10771 /* Swapping operands may cause constant to appear as first operand. */
10772 if (!nonimmediate_operand (op0, VOIDmode))
10774 if (no_new_pseudos)
10776 op0 = force_reg (mode, op0);
10778 ix86_compare_op0 = op0;
10779 ix86_compare_op1 = op1;
10780 *pop = ix86_expand_compare (code, NULL, NULL);
10781 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10786 ix86_expand_int_movcc (rtx operands[])
10788 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10789 rtx compare_seq, compare_op;
10790 rtx second_test, bypass_test;
10791 enum machine_mode mode = GET_MODE (operands[0]);
10792 bool sign_bit_compare_p = false;;
10795 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10796 compare_seq = get_insns ();
10799 compare_code = GET_CODE (compare_op);
10801 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10802 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10803 sign_bit_compare_p = true;
10805 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10806 HImode insns, we'd be swallowed in word prefix ops. */
10808 if ((mode != HImode || TARGET_FAST_PREFIX)
10809 && (mode != (TARGET_64BIT ? TImode : DImode))
10810 && GET_CODE (operands[2]) == CONST_INT
10811 && GET_CODE (operands[3]) == CONST_INT)
10813 rtx out = operands[0];
10814 HOST_WIDE_INT ct = INTVAL (operands[2]);
10815 HOST_WIDE_INT cf = INTVAL (operands[3]);
10816 HOST_WIDE_INT diff;
10819 /* Sign bit compares are better done using shifts than we do by using
10821 if (sign_bit_compare_p
10822 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10823 ix86_compare_op1, &compare_op))
10825 /* Detect overlap between destination and compare sources. */
10828 if (!sign_bit_compare_p)
10830 bool fpcmp = false;
10832 compare_code = GET_CODE (compare_op);
10834 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10835 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10838 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10841 /* To simplify rest of code, restrict to the GEU case. */
10842 if (compare_code == LTU)
10844 HOST_WIDE_INT tmp = ct;
10847 compare_code = reverse_condition (compare_code);
10848 code = reverse_condition (code);
10853 PUT_CODE (compare_op,
10854 reverse_condition_maybe_unordered
10855 (GET_CODE (compare_op)));
10857 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10861 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10862 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10863 tmp = gen_reg_rtx (mode);
10865 if (mode == DImode)
10866 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10868 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10872 if (code == GT || code == GE)
10873 code = reverse_condition (code);
10876 HOST_WIDE_INT tmp = ct;
10881 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10882 ix86_compare_op1, VOIDmode, 0, -1);
10895 tmp = expand_simple_binop (mode, PLUS,
10897 copy_rtx (tmp), 1, OPTAB_DIRECT);
10908 tmp = expand_simple_binop (mode, IOR,
10910 copy_rtx (tmp), 1, OPTAB_DIRECT);
10912 else if (diff == -1 && ct)
10922 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10924 tmp = expand_simple_binop (mode, PLUS,
10925 copy_rtx (tmp), GEN_INT (cf),
10926 copy_rtx (tmp), 1, OPTAB_DIRECT);
10934 * andl cf - ct, dest
10944 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10947 tmp = expand_simple_binop (mode, AND,
10949 gen_int_mode (cf - ct, mode),
10950 copy_rtx (tmp), 1, OPTAB_DIRECT);
10952 tmp = expand_simple_binop (mode, PLUS,
10953 copy_rtx (tmp), GEN_INT (ct),
10954 copy_rtx (tmp), 1, OPTAB_DIRECT);
10957 if (!rtx_equal_p (tmp, out))
10958 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10960 return 1; /* DONE */
10966 tmp = ct, ct = cf, cf = tmp;
10968 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10970 /* We may be reversing unordered compare to normal compare, that
10971 is not valid in general (we may convert non-trapping condition
10972 to trapping one), however on i386 we currently emit all
10973 comparisons unordered. */
10974 compare_code = reverse_condition_maybe_unordered (compare_code);
10975 code = reverse_condition_maybe_unordered (code);
10979 compare_code = reverse_condition (compare_code);
10980 code = reverse_condition (code);
10984 compare_code = UNKNOWN;
10985 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10986 && GET_CODE (ix86_compare_op1) == CONST_INT)
10988 if (ix86_compare_op1 == const0_rtx
10989 && (code == LT || code == GE))
10990 compare_code = code;
10991 else if (ix86_compare_op1 == constm1_rtx)
10995 else if (code == GT)
11000 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11001 if (compare_code != UNKNOWN
11002 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11003 && (cf == -1 || ct == -1))
11005 /* If lea code below could be used, only optimize
11006 if it results in a 2 insn sequence. */
11008 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11009 || diff == 3 || diff == 5 || diff == 9)
11010 || (compare_code == LT && ct == -1)
11011 || (compare_code == GE && cf == -1))
11014 * notl op1 (if necessary)
11022 code = reverse_condition (code);
11025 out = emit_store_flag (out, code, ix86_compare_op0,
11026 ix86_compare_op1, VOIDmode, 0, -1);
11028 out = expand_simple_binop (mode, IOR,
11030 out, 1, OPTAB_DIRECT);
11031 if (out != operands[0])
11032 emit_move_insn (operands[0], out);
11034 return 1; /* DONE */
11039 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11040 || diff == 3 || diff == 5 || diff == 9)
11041 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11043 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11049 * lea cf(dest*(ct-cf)),dest
11053 * This also catches the degenerate setcc-only case.
11059 out = emit_store_flag (out, code, ix86_compare_op0,
11060 ix86_compare_op1, VOIDmode, 0, 1);
11063 /* On x86_64 the lea instruction operates on Pmode, so we need
11064 to get arithmetics done in proper mode to match. */
11066 tmp = copy_rtx (out);
11070 out1 = copy_rtx (out);
11071 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11075 tmp = gen_rtx_PLUS (mode, tmp, out1);
11081 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11084 if (!rtx_equal_p (tmp, out))
11087 out = force_operand (tmp, copy_rtx (out));
11089 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11091 if (!rtx_equal_p (out, operands[0]))
11092 emit_move_insn (operands[0], copy_rtx (out));
11094 return 1; /* DONE */
11098 * General case: Jumpful:
11099 * xorl dest,dest cmpl op1, op2
11100 * cmpl op1, op2 movl ct, dest
11101 * setcc dest jcc 1f
11102 * decl dest movl cf, dest
11103 * andl (cf-ct),dest 1:
11106 * Size 20. Size 14.
11108 * This is reasonably steep, but branch mispredict costs are
11109 * high on modern cpus, so consider failing only if optimizing
11113 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11114 && BRANCH_COST >= 2)
11120 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11121 /* We may be reversing unordered compare to normal compare,
11122 that is not valid in general (we may convert non-trapping
11123 condition to trapping one), however on i386 we currently
11124 emit all comparisons unordered. */
11125 code = reverse_condition_maybe_unordered (code);
11128 code = reverse_condition (code);
11129 if (compare_code != UNKNOWN)
11130 compare_code = reverse_condition (compare_code);
11134 if (compare_code != UNKNOWN)
11136 /* notl op1 (if needed)
11141 For x < 0 (resp. x <= -1) there will be no notl,
11142 so if possible swap the constants to get rid of the
11144 True/false will be -1/0 while code below (store flag
11145 followed by decrement) is 0/-1, so the constants need
11146 to be exchanged once more. */
11148 if (compare_code == GE || !cf)
11150 code = reverse_condition (code);
11155 HOST_WIDE_INT tmp = cf;
11160 out = emit_store_flag (out, code, ix86_compare_op0,
11161 ix86_compare_op1, VOIDmode, 0, -1);
11165 out = emit_store_flag (out, code, ix86_compare_op0,
11166 ix86_compare_op1, VOIDmode, 0, 1);
11168 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11169 copy_rtx (out), 1, OPTAB_DIRECT);
11172 out = expand_simple_binop (mode, AND, copy_rtx (out),
11173 gen_int_mode (cf - ct, mode),
11174 copy_rtx (out), 1, OPTAB_DIRECT);
11176 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11177 copy_rtx (out), 1, OPTAB_DIRECT);
11178 if (!rtx_equal_p (out, operands[0]))
11179 emit_move_insn (operands[0], copy_rtx (out));
11181 return 1; /* DONE */
11185 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11187 /* Try a few things more with specific constants and a variable. */
11190 rtx var, orig_out, out, tmp;
11192 if (BRANCH_COST <= 2)
11193 return 0; /* FAIL */
11195 /* If one of the two operands is an interesting constant, load a
11196 constant with the above and mask it in with a logical operation. */
11198 if (GET_CODE (operands[2]) == CONST_INT)
11201 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11202 operands[3] = constm1_rtx, op = and_optab;
11203 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11204 operands[3] = const0_rtx, op = ior_optab;
11206 return 0; /* FAIL */
11208 else if (GET_CODE (operands[3]) == CONST_INT)
11211 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11212 operands[2] = constm1_rtx, op = and_optab;
11213 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11214 operands[2] = const0_rtx, op = ior_optab;
11216 return 0; /* FAIL */
11219 return 0; /* FAIL */
11221 orig_out = operands[0];
11222 tmp = gen_reg_rtx (mode);
11225 /* Recurse to get the constant loaded. */
11226 if (ix86_expand_int_movcc (operands) == 0)
11227 return 0; /* FAIL */
11229 /* Mask in the interesting variable. */
11230 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11232 if (!rtx_equal_p (out, orig_out))
11233 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11235 return 1; /* DONE */
11239 * For comparison with above,
11249 if (! nonimmediate_operand (operands[2], mode))
11250 operands[2] = force_reg (mode, operands[2]);
11251 if (! nonimmediate_operand (operands[3], mode))
11252 operands[3] = force_reg (mode, operands[3]);
11254 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11256 rtx tmp = gen_reg_rtx (mode);
11257 emit_move_insn (tmp, operands[3]);
11260 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11262 rtx tmp = gen_reg_rtx (mode);
11263 emit_move_insn (tmp, operands[2]);
11267 if (! register_operand (operands[2], VOIDmode)
11269 || ! register_operand (operands[3], VOIDmode)))
11270 operands[2] = force_reg (mode, operands[2]);
11273 && ! register_operand (operands[3], VOIDmode))
11274 operands[3] = force_reg (mode, operands[3]);
11276 emit_insn (compare_seq);
11277 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11278 gen_rtx_IF_THEN_ELSE (mode,
11279 compare_op, operands[2],
11282 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11283 gen_rtx_IF_THEN_ELSE (mode,
11285 copy_rtx (operands[3]),
11286 copy_rtx (operands[0]))));
11288 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11289 gen_rtx_IF_THEN_ELSE (mode,
11291 copy_rtx (operands[2]),
11292 copy_rtx (operands[0]))));
11294 return 1; /* DONE */
11297 /* Swap, force into registers, or otherwise massage the two operands
11298 to an sse comparison with a mask result. Thus we differ a bit from
11299 ix86_prepare_fp_compare_args which expects to produce a flags result.
11301 The DEST operand exists to help determine whether to commute commutative
11302 operators. The POP0/POP1 operands are updated in place. The new
11303 comparison code is returned, or UNKNOWN if not implementable. */
11305 static enum rtx_code
11306 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11307 rtx *pop0, rtx *pop1)
11315 /* We have no LTGT as an operator. We could implement it with
11316 NE & ORDERED, but this requires an extra temporary. It's
11317 not clear that it's worth it. */
11324 /* These are supported directly. */
11331 /* For commutative operators, try to canonicalize the destination
11332 operand to be first in the comparison - this helps reload to
11333 avoid extra moves. */
11334 if (!dest || !rtx_equal_p (dest, *pop1))
11342 /* These are not supported directly. Swap the comparison operands
11343 to transform into something that is supported. */
11347 code = swap_condition (code);
11351 gcc_unreachable ();
11357 /* Detect conditional moves that exactly match min/max operational
11358 semantics. Note that this is IEEE safe, as long as we don't
11359 interchange the operands.
11361 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11362 and TRUE if the operation is successful and instructions are emitted. */
11365 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11366 rtx cmp_op1, rtx if_true, rtx if_false)
11368 enum machine_mode mode;
11374 else if (code == UNGE)
11377 if_true = if_false;
11383 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11385 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11390 mode = GET_MODE (dest);
11392 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11393 but MODE may be a vector mode and thus not appropriate. */
11394 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11396 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11399 if_true = force_reg (mode, if_true);
11400 v = gen_rtvec (2, if_true, if_false);
11401 tmp = gen_rtx_UNSPEC (mode, v, u);
11405 code = is_min ? SMIN : SMAX;
11406 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11409 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11413 /* Expand an sse vector comparison. Return the register with the result. */
11416 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11417 rtx op_true, rtx op_false)
11419 enum machine_mode mode = GET_MODE (dest);
11422 cmp_op0 = force_reg (mode, cmp_op0);
11423 if (!nonimmediate_operand (cmp_op1, mode))
11424 cmp_op1 = force_reg (mode, cmp_op1);
11427 || reg_overlap_mentioned_p (dest, op_true)
11428 || reg_overlap_mentioned_p (dest, op_false))
11429 dest = gen_reg_rtx (mode);
11431 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11432 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11437 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11438 operations. This is used for both scalar and vector conditional moves. */
11441 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11443 enum machine_mode mode = GET_MODE (dest);
11446 if (op_false == CONST0_RTX (mode))
11448 op_true = force_reg (mode, op_true);
11449 x = gen_rtx_AND (mode, cmp, op_true);
11450 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11452 else if (op_true == CONST0_RTX (mode))
11454 op_false = force_reg (mode, op_false);
11455 x = gen_rtx_NOT (mode, cmp);
11456 x = gen_rtx_AND (mode, x, op_false);
11457 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11461 op_true = force_reg (mode, op_true);
11462 op_false = force_reg (mode, op_false);
11464 t2 = gen_reg_rtx (mode);
11466 t3 = gen_reg_rtx (mode);
11470 x = gen_rtx_AND (mode, op_true, cmp);
11471 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11473 x = gen_rtx_NOT (mode, cmp);
11474 x = gen_rtx_AND (mode, x, op_false);
11475 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11477 x = gen_rtx_IOR (mode, t3, t2);
11478 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11482 /* Expand a floating-point conditional move. Return true if successful. */
11485 ix86_expand_fp_movcc (rtx operands[])
11487 enum machine_mode mode = GET_MODE (operands[0]);
11488 enum rtx_code code = GET_CODE (operands[1]);
11489 rtx tmp, compare_op, second_test, bypass_test;
11491 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11493 enum machine_mode cmode;
11495 /* Since we've no cmove for sse registers, don't force bad register
11496 allocation just to gain access to it. Deny movcc when the
11497 comparison mode doesn't match the move mode. */
11498 cmode = GET_MODE (ix86_compare_op0);
11499 if (cmode == VOIDmode)
11500 cmode = GET_MODE (ix86_compare_op1);
11504 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11506 &ix86_compare_op1);
11507 if (code == UNKNOWN)
11510 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11511 ix86_compare_op1, operands[2],
11515 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11516 ix86_compare_op1, operands[2], operands[3]);
11517 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11521 /* The floating point conditional move instructions don't directly
11522 support conditions resulting from a signed integer comparison. */
11524 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11526 /* The floating point conditional move instructions don't directly
11527 support signed integer comparisons. */
11529 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11531 gcc_assert (!second_test && !bypass_test);
11532 tmp = gen_reg_rtx (QImode);
11533 ix86_expand_setcc (code, tmp);
11535 ix86_compare_op0 = tmp;
11536 ix86_compare_op1 = const0_rtx;
11537 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11539 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11541 tmp = gen_reg_rtx (mode);
11542 emit_move_insn (tmp, operands[3]);
11545 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11547 tmp = gen_reg_rtx (mode);
11548 emit_move_insn (tmp, operands[2]);
11552 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11553 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11554 operands[2], operands[3])));
11556 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11557 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11558 operands[3], operands[0])));
11560 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11561 gen_rtx_IF_THEN_ELSE (mode, second_test,
11562 operands[2], operands[0])));
11567 /* Expand a floating-point vector conditional move; a vcond operation
11568 rather than a movcc operation. */
11571 ix86_expand_fp_vcond (rtx operands[])
11573 enum rtx_code code = GET_CODE (operands[3]);
11576 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11577 &operands[4], &operands[5]);
11578 if (code == UNKNOWN)
11581 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11582 operands[5], operands[1], operands[2]))
11585 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11586 operands[1], operands[2]);
11587 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11591 /* Expand a signed integral vector conditional move. */
11594 ix86_expand_int_vcond (rtx operands[])
11596 enum machine_mode mode = GET_MODE (operands[0]);
11597 enum rtx_code code = GET_CODE (operands[3]);
11598 bool negate = false;
11601 cop0 = operands[4];
11602 cop1 = operands[5];
11604 /* Canonicalize the comparison to EQ, GT, GTU. */
11615 code = reverse_condition (code);
11621 code = reverse_condition (code);
11627 code = swap_condition (code);
11628 x = cop0, cop0 = cop1, cop1 = x;
11632 gcc_unreachable ();
11635 /* Unsigned parallel compare is not supported by the hardware. Play some
11636 tricks to turn this into a signed comparison against 0. */
11639 cop0 = force_reg (mode, cop0);
11647 /* Perform a parallel modulo subtraction. */
11648 t1 = gen_reg_rtx (mode);
11649 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11651 /* Extract the original sign bit of op0. */
11652 mask = GEN_INT (-0x80000000);
11653 mask = gen_rtx_CONST_VECTOR (mode,
11654 gen_rtvec (4, mask, mask, mask, mask));
11655 mask = force_reg (mode, mask);
11656 t2 = gen_reg_rtx (mode);
11657 emit_insn (gen_andv4si3 (t2, cop0, mask));
11659 /* XOR it back into the result of the subtraction. This results
11660 in the sign bit set iff we saw unsigned underflow. */
11661 x = gen_reg_rtx (mode);
11662 emit_insn (gen_xorv4si3 (x, t1, t2));
11670 /* Perform a parallel unsigned saturating subtraction. */
11671 x = gen_reg_rtx (mode);
11672 emit_insn (gen_rtx_SET (VOIDmode, x,
11673 gen_rtx_US_MINUS (mode, cop0, cop1)));
11680 gcc_unreachable ();
11684 cop1 = CONST0_RTX (mode);
11687 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11688 operands[1+negate], operands[2-negate]);
11690 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11691 operands[2-negate]);
11695 /* Expand conditional increment or decrement using adb/sbb instructions.
11696 The default case using setcc followed by the conditional move can be
11697 done by generic code. */
11699 ix86_expand_int_addcc (rtx operands[])
11701 enum rtx_code code = GET_CODE (operands[1]);
11703 rtx val = const0_rtx;
11704 bool fpcmp = false;
11705 enum machine_mode mode = GET_MODE (operands[0]);
11707 if (operands[3] != const1_rtx
11708 && operands[3] != constm1_rtx)
11710 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11711 ix86_compare_op1, &compare_op))
11713 code = GET_CODE (compare_op);
11715 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11716 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11719 code = ix86_fp_compare_code_to_integer (code);
11726 PUT_CODE (compare_op,
11727 reverse_condition_maybe_unordered
11728 (GET_CODE (compare_op)));
11730 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11732 PUT_MODE (compare_op, mode);
11734 /* Construct either adc or sbb insn. */
11735 if ((code == LTU) == (operands[3] == constm1_rtx))
11737 switch (GET_MODE (operands[0]))
11740 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11743 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11746 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11749 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11752 gcc_unreachable ();
11757 switch (GET_MODE (operands[0]))
11760 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11763 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11766 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11769 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11772 gcc_unreachable ();
11775 return 1; /* DONE */
11779 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11780 works for floating pointer parameters and nonoffsetable memories.
11781 For pushes, it returns just stack offsets; the values will be saved
11782 in the right order. Maximally three parts are generated. */
11785 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11790 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11792 size = (GET_MODE_SIZE (mode) + 4) / 8;
11794 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11795 gcc_assert (size >= 2 && size <= 3);
11797 /* Optimize constant pool reference to immediates. This is used by fp
11798 moves, that force all constants to memory to allow combining. */
11799 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11801 rtx tmp = maybe_get_pool_constant (operand);
11806 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11808 /* The only non-offsetable memories we handle are pushes. */
11809 int ok = push_operand (operand, VOIDmode);
11813 operand = copy_rtx (operand);
11814 PUT_MODE (operand, Pmode);
11815 parts[0] = parts[1] = parts[2] = operand;
11819 if (GET_CODE (operand) == CONST_VECTOR)
11821 enum machine_mode imode = int_mode_for_mode (mode);
11822 /* Caution: if we looked through a constant pool memory above,
11823 the operand may actually have a different mode now. That's
11824 ok, since we want to pun this all the way back to an integer. */
11825 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11826 gcc_assert (operand != NULL);
11832 if (mode == DImode)
11833 split_di (&operand, 1, &parts[0], &parts[1]);
11836 if (REG_P (operand))
11838 gcc_assert (reload_completed);
11839 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11840 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11842 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11844 else if (offsettable_memref_p (operand))
11846 operand = adjust_address (operand, SImode, 0);
11847 parts[0] = operand;
11848 parts[1] = adjust_address (operand, SImode, 4);
11850 parts[2] = adjust_address (operand, SImode, 8);
11852 else if (GET_CODE (operand) == CONST_DOUBLE)
11857 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11861 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11862 parts[2] = gen_int_mode (l[2], SImode);
11865 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11868 gcc_unreachable ();
11870 parts[1] = gen_int_mode (l[1], SImode);
11871 parts[0] = gen_int_mode (l[0], SImode);
11874 gcc_unreachable ();
11879 if (mode == TImode)
11880 split_ti (&operand, 1, &parts[0], &parts[1]);
11881 if (mode == XFmode || mode == TFmode)
11883 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11884 if (REG_P (operand))
11886 gcc_assert (reload_completed);
11887 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11888 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11890 else if (offsettable_memref_p (operand))
11892 operand = adjust_address (operand, DImode, 0);
11893 parts[0] = operand;
11894 parts[1] = adjust_address (operand, upper_mode, 8);
11896 else if (GET_CODE (operand) == CONST_DOUBLE)
11901 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11902 real_to_target (l, &r, mode);
11904 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11905 if (HOST_BITS_PER_WIDE_INT >= 64)
11908 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11909 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11912 parts[0] = immed_double_const (l[0], l[1], DImode);
11914 if (upper_mode == SImode)
11915 parts[1] = gen_int_mode (l[2], SImode);
11916 else if (HOST_BITS_PER_WIDE_INT >= 64)
11919 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11920 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11923 parts[1] = immed_double_const (l[2], l[3], DImode);
11926 gcc_unreachable ();
11933 /* Emit insns to perform a move or push of DI, DF, and XF values.
11934 Return false when normal moves are needed; true when all required
11935 insns have been emitted. Operands 2-4 contain the input values
11936 int the correct order; operands 5-7 contain the output values. */
11939 ix86_split_long_move (rtx operands[])
11944 int collisions = 0;
11945 enum machine_mode mode = GET_MODE (operands[0]);
11947 /* The DFmode expanders may ask us to move double.
11948 For 64bit target this is single move. By hiding the fact
11949 here we simplify i386.md splitters. */
11950 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11952 /* Optimize constant pool reference to immediates. This is used by
11953 fp moves, that force all constants to memory to allow combining. */
11955 if (GET_CODE (operands[1]) == MEM
11956 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11957 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11958 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11959 if (push_operand (operands[0], VOIDmode))
11961 operands[0] = copy_rtx (operands[0]);
11962 PUT_MODE (operands[0], Pmode);
11965 operands[0] = gen_lowpart (DImode, operands[0]);
11966 operands[1] = gen_lowpart (DImode, operands[1]);
11967 emit_move_insn (operands[0], operands[1]);
11971 /* The only non-offsettable memory we handle is push. */
11972 if (push_operand (operands[0], VOIDmode))
11975 gcc_assert (GET_CODE (operands[0]) != MEM
11976 || offsettable_memref_p (operands[0]));
11978 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11979 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
11981 /* When emitting push, take care for source operands on the stack. */
11982 if (push && GET_CODE (operands[1]) == MEM
11983 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11986 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11987 XEXP (part[1][2], 0));
11988 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11989 XEXP (part[1][1], 0));
11992 /* We need to do copy in the right order in case an address register
11993 of the source overlaps the destination. */
11994 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11996 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
11998 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12001 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12004 /* Collision in the middle part can be handled by reordering. */
12005 if (collisions == 1 && nparts == 3
12006 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12009 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12010 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12013 /* If there are more collisions, we can't handle it by reordering.
12014 Do an lea to the last part and use only one colliding move. */
12015 else if (collisions > 1)
12021 base = part[0][nparts - 1];
12023 /* Handle the case when the last part isn't valid for lea.
12024 Happens in 64-bit mode storing the 12-byte XFmode. */
12025 if (GET_MODE (base) != Pmode)
12026 base = gen_rtx_REG (Pmode, REGNO (base));
12028 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12029 part[1][0] = replace_equiv_address (part[1][0], base);
12030 part[1][1] = replace_equiv_address (part[1][1],
12031 plus_constant (base, UNITS_PER_WORD));
12033 part[1][2] = replace_equiv_address (part[1][2],
12034 plus_constant (base, 8));
12044 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12045 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12046 emit_move_insn (part[0][2], part[1][2]);
12051 /* In 64bit mode we don't have 32bit push available. In case this is
12052 register, it is OK - we will just use larger counterpart. We also
12053 retype memory - these comes from attempt to avoid REX prefix on
12054 moving of second half of TFmode value. */
12055 if (GET_MODE (part[1][1]) == SImode)
12057 switch (GET_CODE (part[1][1]))
12060 part[1][1] = adjust_address (part[1][1], DImode, 0);
12064 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12068 gcc_unreachable ();
12071 if (GET_MODE (part[1][0]) == SImode)
12072 part[1][0] = part[1][1];
12075 emit_move_insn (part[0][1], part[1][1]);
12076 emit_move_insn (part[0][0], part[1][0]);
12080 /* Choose correct order to not overwrite the source before it is copied. */
12081 if ((REG_P (part[0][0])
12082 && REG_P (part[1][1])
12083 && (REGNO (part[0][0]) == REGNO (part[1][1])
12085 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12087 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12091 operands[2] = part[0][2];
12092 operands[3] = part[0][1];
12093 operands[4] = part[0][0];
12094 operands[5] = part[1][2];
12095 operands[6] = part[1][1];
12096 operands[7] = part[1][0];
12100 operands[2] = part[0][1];
12101 operands[3] = part[0][0];
12102 operands[5] = part[1][1];
12103 operands[6] = part[1][0];
12110 operands[2] = part[0][0];
12111 operands[3] = part[0][1];
12112 operands[4] = part[0][2];
12113 operands[5] = part[1][0];
12114 operands[6] = part[1][1];
12115 operands[7] = part[1][2];
12119 operands[2] = part[0][0];
12120 operands[3] = part[0][1];
12121 operands[5] = part[1][0];
12122 operands[6] = part[1][1];
12126 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12129 if (GET_CODE (operands[5]) == CONST_INT
12130 && operands[5] != const0_rtx
12131 && REG_P (operands[2]))
12133 if (GET_CODE (operands[6]) == CONST_INT
12134 && INTVAL (operands[6]) == INTVAL (operands[5]))
12135 operands[6] = operands[2];
12138 && GET_CODE (operands[7]) == CONST_INT
12139 && INTVAL (operands[7]) == INTVAL (operands[5]))
12140 operands[7] = operands[2];
12144 && GET_CODE (operands[6]) == CONST_INT
12145 && operands[6] != const0_rtx
12146 && REG_P (operands[3])
12147 && GET_CODE (operands[7]) == CONST_INT
12148 && INTVAL (operands[7]) == INTVAL (operands[6]))
12149 operands[7] = operands[3];
12152 emit_move_insn (operands[2], operands[5]);
12153 emit_move_insn (operands[3], operands[6]);
12155 emit_move_insn (operands[4], operands[7]);
12160 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12161 left shift by a constant, either using a single shift or
12162 a sequence of add instructions. */
12165 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12169 emit_insn ((mode == DImode
12171 : gen_adddi3) (operand, operand, operand));
12173 else if (!optimize_size
12174 && count * ix86_cost->add <= ix86_cost->shift_const)
12177 for (i=0; i<count; i++)
12179 emit_insn ((mode == DImode
12181 : gen_adddi3) (operand, operand, operand));
12185 emit_insn ((mode == DImode
12187 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12191 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12193 rtx low[2], high[2];
12195 const int single_width = mode == DImode ? 32 : 64;
12197 if (GET_CODE (operands[2]) == CONST_INT)
12199 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12200 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12202 if (count >= single_width)
12204 emit_move_insn (high[0], low[1]);
12205 emit_move_insn (low[0], const0_rtx);
12207 if (count > single_width)
12208 ix86_expand_ashl_const (high[0], count - single_width, mode);
12212 if (!rtx_equal_p (operands[0], operands[1]))
12213 emit_move_insn (operands[0], operands[1]);
12214 emit_insn ((mode == DImode
12216 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12217 ix86_expand_ashl_const (low[0], count, mode);
12222 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12224 if (operands[1] == const1_rtx)
12226 /* Assuming we've chosen a QImode capable registers, then 1 << N
12227 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12228 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12230 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12232 ix86_expand_clear (low[0]);
12233 ix86_expand_clear (high[0]);
12234 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12236 d = gen_lowpart (QImode, low[0]);
12237 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12238 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12239 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12241 d = gen_lowpart (QImode, high[0]);
12242 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12243 s = gen_rtx_NE (QImode, flags, const0_rtx);
12244 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12247 /* Otherwise, we can get the same results by manually performing
12248 a bit extract operation on bit 5/6, and then performing the two
12249 shifts. The two methods of getting 0/1 into low/high are exactly
12250 the same size. Avoiding the shift in the bit extract case helps
12251 pentium4 a bit; no one else seems to care much either way. */
12256 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12257 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12259 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12260 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12262 emit_insn ((mode == DImode
12264 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12265 emit_insn ((mode == DImode
12267 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12268 emit_move_insn (low[0], high[0]);
12269 emit_insn ((mode == DImode
12271 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12274 emit_insn ((mode == DImode
12276 : gen_ashldi3) (low[0], low[0], operands[2]));
12277 emit_insn ((mode == DImode
12279 : gen_ashldi3) (high[0], high[0], operands[2]));
12283 if (operands[1] == constm1_rtx)
12285 /* For -1 << N, we can avoid the shld instruction, because we
12286 know that we're shifting 0...31/63 ones into a -1. */
12287 emit_move_insn (low[0], constm1_rtx);
12289 emit_move_insn (high[0], low[0]);
12291 emit_move_insn (high[0], constm1_rtx);
12295 if (!rtx_equal_p (operands[0], operands[1]))
12296 emit_move_insn (operands[0], operands[1]);
12298 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12299 emit_insn ((mode == DImode
12301 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12304 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12306 if (TARGET_CMOVE && scratch)
12308 ix86_expand_clear (scratch);
12309 emit_insn ((mode == DImode
12310 ? gen_x86_shift_adj_1
12311 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12314 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12318 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12320 rtx low[2], high[2];
12322 const int single_width = mode == DImode ? 32 : 64;
12324 if (GET_CODE (operands[2]) == CONST_INT)
12326 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12327 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12329 if (count == single_width * 2 - 1)
12331 emit_move_insn (high[0], high[1]);
12332 emit_insn ((mode == DImode
12334 : gen_ashrdi3) (high[0], high[0],
12335 GEN_INT (single_width - 1)));
12336 emit_move_insn (low[0], high[0]);
12339 else if (count >= single_width)
12341 emit_move_insn (low[0], high[1]);
12342 emit_move_insn (high[0], low[0]);
12343 emit_insn ((mode == DImode
12345 : gen_ashrdi3) (high[0], high[0],
12346 GEN_INT (single_width - 1)));
12347 if (count > single_width)
12348 emit_insn ((mode == DImode
12350 : gen_ashrdi3) (low[0], low[0],
12351 GEN_INT (count - single_width)));
12355 if (!rtx_equal_p (operands[0], operands[1]))
12356 emit_move_insn (operands[0], operands[1]);
12357 emit_insn ((mode == DImode
12359 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12360 emit_insn ((mode == DImode
12362 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12367 if (!rtx_equal_p (operands[0], operands[1]))
12368 emit_move_insn (operands[0], operands[1]);
12370 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12372 emit_insn ((mode == DImode
12374 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12375 emit_insn ((mode == DImode
12377 : gen_ashrdi3) (high[0], high[0], operands[2]));
12379 if (TARGET_CMOVE && scratch)
12381 emit_move_insn (scratch, high[0]);
12382 emit_insn ((mode == DImode
12384 : gen_ashrdi3) (scratch, scratch,
12385 GEN_INT (single_width - 1)));
12386 emit_insn ((mode == DImode
12387 ? gen_x86_shift_adj_1
12388 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12392 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12397 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12399 rtx low[2], high[2];
12401 const int single_width = mode == DImode ? 32 : 64;
12403 if (GET_CODE (operands[2]) == CONST_INT)
12405 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12406 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12408 if (count >= single_width)
12410 emit_move_insn (low[0], high[1]);
12411 ix86_expand_clear (high[0]);
12413 if (count > single_width)
12414 emit_insn ((mode == DImode
12416 : gen_lshrdi3) (low[0], low[0],
12417 GEN_INT (count - single_width)));
12421 if (!rtx_equal_p (operands[0], operands[1]))
12422 emit_move_insn (operands[0], operands[1]);
12423 emit_insn ((mode == DImode
12425 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12426 emit_insn ((mode == DImode
12428 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12433 if (!rtx_equal_p (operands[0], operands[1]))
12434 emit_move_insn (operands[0], operands[1]);
12436 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12438 emit_insn ((mode == DImode
12440 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12441 emit_insn ((mode == DImode
12443 : gen_lshrdi3) (high[0], high[0], operands[2]));
12445 /* Heh. By reversing the arguments, we can reuse this pattern. */
12446 if (TARGET_CMOVE && scratch)
12448 ix86_expand_clear (scratch);
12449 emit_insn ((mode == DImode
12450 ? gen_x86_shift_adj_1
12451 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12455 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12459 /* Helper function for the string operations below. Dest VARIABLE whether
12460 it is aligned to VALUE bytes. If true, jump to the label. */
12462 ix86_expand_aligntest (rtx variable, int value)
12464 rtx label = gen_label_rtx ();
12465 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12466 if (GET_MODE (variable) == DImode)
12467 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12469 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12470 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12475 /* Adjust COUNTER by the VALUE. */
12477 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12479 if (GET_MODE (countreg) == DImode)
12480 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12482 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12485 /* Zero extend possibly SImode EXP to Pmode register. */
12487 ix86_zero_extend_to_Pmode (rtx exp)
12490 if (GET_MODE (exp) == VOIDmode)
12491 return force_reg (Pmode, exp);
12492 if (GET_MODE (exp) == Pmode)
12493 return copy_to_mode_reg (Pmode, exp);
12494 r = gen_reg_rtx (Pmode);
12495 emit_insn (gen_zero_extendsidi2 (r, exp));
12499 /* Expand string move (memcpy) operation. Use i386 string operations when
12500 profitable. expand_clrmem contains similar code. */
12502 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12504 rtx srcreg, destreg, countreg, srcexp, destexp;
12505 enum machine_mode counter_mode;
12506 HOST_WIDE_INT align = 0;
12507 unsigned HOST_WIDE_INT count = 0;
12509 if (GET_CODE (align_exp) == CONST_INT)
12510 align = INTVAL (align_exp);
12512 /* Can't use any of this if the user has appropriated esi or edi. */
12513 if (global_regs[4] || global_regs[5])
12516 /* This simple hack avoids all inlining code and simplifies code below. */
12517 if (!TARGET_ALIGN_STRINGOPS)
12520 if (GET_CODE (count_exp) == CONST_INT)
12522 count = INTVAL (count_exp);
12523 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12527 /* Figure out proper mode for counter. For 32bits it is always SImode,
12528 for 64bits use SImode when possible, otherwise DImode.
12529 Set count to number of bytes copied when known at compile time. */
12531 || GET_MODE (count_exp) == SImode
12532 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12533 counter_mode = SImode;
12535 counter_mode = DImode;
12537 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12539 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12540 if (destreg != XEXP (dst, 0))
12541 dst = replace_equiv_address_nv (dst, destreg);
12542 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12543 if (srcreg != XEXP (src, 0))
12544 src = replace_equiv_address_nv (src, srcreg);
12546 /* When optimizing for size emit simple rep ; movsb instruction for
12547 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12548 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12549 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12550 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12551 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12552 known to be zero or not. The rep; movsb sequence causes higher
12553 register pressure though, so take that into account. */
12555 if ((!optimize || optimize_size)
12560 || (count & 3) + count / 4 > 6))))
12562 emit_insn (gen_cld ());
12563 countreg = ix86_zero_extend_to_Pmode (count_exp);
12564 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12565 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12566 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12570 /* For constant aligned (or small unaligned) copies use rep movsl
12571 followed by code copying the rest. For PentiumPro ensure 8 byte
12572 alignment to allow rep movsl acceleration. */
12574 else if (count != 0
12576 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12577 || optimize_size || count < (unsigned int) 64))
12579 unsigned HOST_WIDE_INT offset = 0;
12580 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12581 rtx srcmem, dstmem;
12583 emit_insn (gen_cld ());
12584 if (count & ~(size - 1))
12586 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12588 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12590 while (offset < (count & ~(size - 1)))
12592 srcmem = adjust_automodify_address_nv (src, movs_mode,
12594 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12596 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12602 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12603 & (TARGET_64BIT ? -1 : 0x3fffffff));
12604 countreg = copy_to_mode_reg (counter_mode, countreg);
12605 countreg = ix86_zero_extend_to_Pmode (countreg);
12607 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12608 GEN_INT (size == 4 ? 2 : 3));
12609 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12610 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12612 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12613 countreg, destexp, srcexp));
12614 offset = count & ~(size - 1);
12617 if (size == 8 && (count & 0x04))
12619 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12621 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12623 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12628 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12630 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12632 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12637 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12639 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12641 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12644 /* The generic code based on the glibc implementation:
12645 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12646 allowing accelerated copying there)
12647 - copy the data using rep movsl
12648 - copy the rest. */
12653 rtx srcmem, dstmem;
12654 int desired_alignment = (TARGET_PENTIUMPRO
12655 && (count == 0 || count >= (unsigned int) 260)
12656 ? 8 : UNITS_PER_WORD);
12657 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12658 dst = change_address (dst, BLKmode, destreg);
12659 src = change_address (src, BLKmode, srcreg);
12661 /* In case we don't know anything about the alignment, default to
12662 library version, since it is usually equally fast and result in
12665 Also emit call when we know that the count is large and call overhead
12666 will not be important. */
12667 if (!TARGET_INLINE_ALL_STRINGOPS
12668 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12671 if (TARGET_SINGLE_STRINGOP)
12672 emit_insn (gen_cld ());
12674 countreg2 = gen_reg_rtx (Pmode);
12675 countreg = copy_to_mode_reg (counter_mode, count_exp);
12677 /* We don't use loops to align destination and to copy parts smaller
12678 than 4 bytes, because gcc is able to optimize such code better (in
12679 the case the destination or the count really is aligned, gcc is often
12680 able to predict the branches) and also it is friendlier to the
12681 hardware branch prediction.
12683 Using loops is beneficial for generic case, because we can
12684 handle small counts using the loops. Many CPUs (such as Athlon)
12685 have large REP prefix setup costs.
12687 This is quite costly. Maybe we can revisit this decision later or
12688 add some customizability to this code. */
12690 if (count == 0 && align < desired_alignment)
12692 label = gen_label_rtx ();
12693 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12694 LEU, 0, counter_mode, 1, label);
12698 rtx label = ix86_expand_aligntest (destreg, 1);
12699 srcmem = change_address (src, QImode, srcreg);
12700 dstmem = change_address (dst, QImode, destreg);
12701 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12702 ix86_adjust_counter (countreg, 1);
12703 emit_label (label);
12704 LABEL_NUSES (label) = 1;
12708 rtx label = ix86_expand_aligntest (destreg, 2);
12709 srcmem = change_address (src, HImode, srcreg);
12710 dstmem = change_address (dst, HImode, destreg);
12711 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12712 ix86_adjust_counter (countreg, 2);
12713 emit_label (label);
12714 LABEL_NUSES (label) = 1;
12716 if (align <= 4 && desired_alignment > 4)
12718 rtx label = ix86_expand_aligntest (destreg, 4);
12719 srcmem = change_address (src, SImode, srcreg);
12720 dstmem = change_address (dst, SImode, destreg);
12721 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12722 ix86_adjust_counter (countreg, 4);
12723 emit_label (label);
12724 LABEL_NUSES (label) = 1;
12727 if (label && desired_alignment > 4 && !TARGET_64BIT)
12729 emit_label (label);
12730 LABEL_NUSES (label) = 1;
12733 if (!TARGET_SINGLE_STRINGOP)
12734 emit_insn (gen_cld ());
12737 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12739 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12743 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12744 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12746 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12747 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12748 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12749 countreg2, destexp, srcexp));
12753 emit_label (label);
12754 LABEL_NUSES (label) = 1;
12756 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12758 srcmem = change_address (src, SImode, srcreg);
12759 dstmem = change_address (dst, SImode, destreg);
12760 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12762 if ((align <= 4 || count == 0) && TARGET_64BIT)
12764 rtx label = ix86_expand_aligntest (countreg, 4);
12765 srcmem = change_address (src, SImode, srcreg);
12766 dstmem = change_address (dst, SImode, destreg);
12767 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12768 emit_label (label);
12769 LABEL_NUSES (label) = 1;
12771 if (align > 2 && count != 0 && (count & 2))
12773 srcmem = change_address (src, HImode, srcreg);
12774 dstmem = change_address (dst, HImode, destreg);
12775 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12777 if (align <= 2 || count == 0)
12779 rtx label = ix86_expand_aligntest (countreg, 2);
12780 srcmem = change_address (src, HImode, srcreg);
12781 dstmem = change_address (dst, HImode, destreg);
12782 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12783 emit_label (label);
12784 LABEL_NUSES (label) = 1;
12786 if (align > 1 && count != 0 && (count & 1))
12788 srcmem = change_address (src, QImode, srcreg);
12789 dstmem = change_address (dst, QImode, destreg);
12790 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12792 if (align <= 1 || count == 0)
12794 rtx label = ix86_expand_aligntest (countreg, 1);
12795 srcmem = change_address (src, QImode, srcreg);
12796 dstmem = change_address (dst, QImode, destreg);
12797 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12798 emit_label (label);
12799 LABEL_NUSES (label) = 1;
12806 /* Expand string clear operation (bzero). Use i386 string operations when
12807 profitable. expand_movmem contains similar code. */
12809 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12811 rtx destreg, zeroreg, countreg, destexp;
12812 enum machine_mode counter_mode;
12813 HOST_WIDE_INT align = 0;
12814 unsigned HOST_WIDE_INT count = 0;
12816 if (GET_CODE (align_exp) == CONST_INT)
12817 align = INTVAL (align_exp);
12819 /* Can't use any of this if the user has appropriated esi. */
12820 if (global_regs[4])
12823 /* This simple hack avoids all inlining code and simplifies code below. */
12824 if (!TARGET_ALIGN_STRINGOPS)
12827 if (GET_CODE (count_exp) == CONST_INT)
12829 count = INTVAL (count_exp);
12830 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12833 /* Figure out proper mode for counter. For 32bits it is always SImode,
12834 for 64bits use SImode when possible, otherwise DImode.
12835 Set count to number of bytes copied when known at compile time. */
12837 || GET_MODE (count_exp) == SImode
12838 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12839 counter_mode = SImode;
12841 counter_mode = DImode;
12843 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12844 if (destreg != XEXP (dst, 0))
12845 dst = replace_equiv_address_nv (dst, destreg);
12848 /* When optimizing for size emit simple rep ; movsb instruction for
12849 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12850 sequence is 7 bytes long, so if optimizing for size and count is
12851 small enough that some stosl, stosw and stosb instructions without
12852 rep are shorter, fall back into the next if. */
12854 if ((!optimize || optimize_size)
12857 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12859 emit_insn (gen_cld ());
12861 countreg = ix86_zero_extend_to_Pmode (count_exp);
12862 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12863 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12864 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12866 else if (count != 0
12868 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12869 || optimize_size || count < (unsigned int) 64))
12871 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12872 unsigned HOST_WIDE_INT offset = 0;
12874 emit_insn (gen_cld ());
12876 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12877 if (count & ~(size - 1))
12879 unsigned HOST_WIDE_INT repcount;
12880 unsigned int max_nonrep;
12882 repcount = count >> (size == 4 ? 2 : 3);
12884 repcount &= 0x3fffffff;
12886 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12887 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12888 bytes. In both cases the latter seems to be faster for small
12890 max_nonrep = size == 4 ? 7 : 4;
12891 if (!optimize_size)
12894 case PROCESSOR_PENTIUM4:
12895 case PROCESSOR_NOCONA:
12902 if (repcount <= max_nonrep)
12903 while (repcount-- > 0)
12905 rtx mem = adjust_automodify_address_nv (dst,
12906 GET_MODE (zeroreg),
12908 emit_insn (gen_strset (destreg, mem, zeroreg));
12913 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12914 countreg = ix86_zero_extend_to_Pmode (countreg);
12915 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12916 GEN_INT (size == 4 ? 2 : 3));
12917 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12918 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12920 offset = count & ~(size - 1);
12923 if (size == 8 && (count & 0x04))
12925 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12927 emit_insn (gen_strset (destreg, mem,
12928 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12933 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12935 emit_insn (gen_strset (destreg, mem,
12936 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12941 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12943 emit_insn (gen_strset (destreg, mem,
12944 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12951 /* Compute desired alignment of the string operation. */
12952 int desired_alignment = (TARGET_PENTIUMPRO
12953 && (count == 0 || count >= (unsigned int) 260)
12954 ? 8 : UNITS_PER_WORD);
12956 /* In case we don't know anything about the alignment, default to
12957 library version, since it is usually equally fast and result in
12960 Also emit call when we know that the count is large and call overhead
12961 will not be important. */
12962 if (!TARGET_INLINE_ALL_STRINGOPS
12963 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12966 if (TARGET_SINGLE_STRINGOP)
12967 emit_insn (gen_cld ());
12969 countreg2 = gen_reg_rtx (Pmode);
12970 countreg = copy_to_mode_reg (counter_mode, count_exp);
12971 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12972 /* Get rid of MEM_OFFSET, it won't be accurate. */
12973 dst = change_address (dst, BLKmode, destreg);
12975 if (count == 0 && align < desired_alignment)
12977 label = gen_label_rtx ();
12978 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12979 LEU, 0, counter_mode, 1, label);
12983 rtx label = ix86_expand_aligntest (destreg, 1);
12984 emit_insn (gen_strset (destreg, dst,
12985 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12986 ix86_adjust_counter (countreg, 1);
12987 emit_label (label);
12988 LABEL_NUSES (label) = 1;
12992 rtx label = ix86_expand_aligntest (destreg, 2);
12993 emit_insn (gen_strset (destreg, dst,
12994 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12995 ix86_adjust_counter (countreg, 2);
12996 emit_label (label);
12997 LABEL_NUSES (label) = 1;
12999 if (align <= 4 && desired_alignment > 4)
13001 rtx label = ix86_expand_aligntest (destreg, 4);
13002 emit_insn (gen_strset (destreg, dst,
13004 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13006 ix86_adjust_counter (countreg, 4);
13007 emit_label (label);
13008 LABEL_NUSES (label) = 1;
13011 if (label && desired_alignment > 4 && !TARGET_64BIT)
13013 emit_label (label);
13014 LABEL_NUSES (label) = 1;
13018 if (!TARGET_SINGLE_STRINGOP)
13019 emit_insn (gen_cld ());
13022 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13024 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13028 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13029 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13031 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13032 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13036 emit_label (label);
13037 LABEL_NUSES (label) = 1;
13040 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13041 emit_insn (gen_strset (destreg, dst,
13042 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13043 if (TARGET_64BIT && (align <= 4 || count == 0))
13045 rtx label = ix86_expand_aligntest (countreg, 4);
13046 emit_insn (gen_strset (destreg, dst,
13047 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13048 emit_label (label);
13049 LABEL_NUSES (label) = 1;
13051 if (align > 2 && count != 0 && (count & 2))
13052 emit_insn (gen_strset (destreg, dst,
13053 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13054 if (align <= 2 || count == 0)
13056 rtx label = ix86_expand_aligntest (countreg, 2);
13057 emit_insn (gen_strset (destreg, dst,
13058 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13059 emit_label (label);
13060 LABEL_NUSES (label) = 1;
13062 if (align > 1 && count != 0 && (count & 1))
13063 emit_insn (gen_strset (destreg, dst,
13064 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13065 if (align <= 1 || count == 0)
13067 rtx label = ix86_expand_aligntest (countreg, 1);
13068 emit_insn (gen_strset (destreg, dst,
13069 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13070 emit_label (label);
13071 LABEL_NUSES (label) = 1;
13077 /* Expand strlen. */
13079 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13081 rtx addr, scratch1, scratch2, scratch3, scratch4;
13083 /* The generic case of strlen expander is long. Avoid it's
13084 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13086 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13087 && !TARGET_INLINE_ALL_STRINGOPS
13089 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13092 addr = force_reg (Pmode, XEXP (src, 0));
13093 scratch1 = gen_reg_rtx (Pmode);
13095 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13098 /* Well it seems that some optimizer does not combine a call like
13099 foo(strlen(bar), strlen(bar));
13100 when the move and the subtraction is done here. It does calculate
13101 the length just once when these instructions are done inside of
13102 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13103 often used and I use one fewer register for the lifetime of
13104 output_strlen_unroll() this is better. */
13106 emit_move_insn (out, addr);
13108 ix86_expand_strlensi_unroll_1 (out, src, align);
13110 /* strlensi_unroll_1 returns the address of the zero at the end of
13111 the string, like memchr(), so compute the length by subtracting
13112 the start address. */
13114 emit_insn (gen_subdi3 (out, out, addr));
13116 emit_insn (gen_subsi3 (out, out, addr));
13121 scratch2 = gen_reg_rtx (Pmode);
13122 scratch3 = gen_reg_rtx (Pmode);
13123 scratch4 = force_reg (Pmode, constm1_rtx);
13125 emit_move_insn (scratch3, addr);
13126 eoschar = force_reg (QImode, eoschar);
13128 emit_insn (gen_cld ());
13129 src = replace_equiv_address_nv (src, scratch3);
13131 /* If .md starts supporting :P, this can be done in .md. */
13132 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13133 scratch4), UNSPEC_SCAS);
13134 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13137 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13138 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13142 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13143 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13149 /* Expand the appropriate insns for doing strlen if not just doing
13152 out = result, initialized with the start address
13153 align_rtx = alignment of the address.
13154 scratch = scratch register, initialized with the startaddress when
13155 not aligned, otherwise undefined
13157 This is just the body. It needs the initializations mentioned above and
13158 some address computing at the end. These things are done in i386.md. */
13161 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13165 rtx align_2_label = NULL_RTX;
13166 rtx align_3_label = NULL_RTX;
13167 rtx align_4_label = gen_label_rtx ();
13168 rtx end_0_label = gen_label_rtx ();
13170 rtx tmpreg = gen_reg_rtx (SImode);
13171 rtx scratch = gen_reg_rtx (SImode);
13175 if (GET_CODE (align_rtx) == CONST_INT)
13176 align = INTVAL (align_rtx);
13178 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13180 /* Is there a known alignment and is it less than 4? */
13183 rtx scratch1 = gen_reg_rtx (Pmode);
13184 emit_move_insn (scratch1, out);
13185 /* Is there a known alignment and is it not 2? */
13188 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13189 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13191 /* Leave just the 3 lower bits. */
13192 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13193 NULL_RTX, 0, OPTAB_WIDEN);
13195 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13196 Pmode, 1, align_4_label);
13197 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13198 Pmode, 1, align_2_label);
13199 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13200 Pmode, 1, align_3_label);
13204 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13205 check if is aligned to 4 - byte. */
13207 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13208 NULL_RTX, 0, OPTAB_WIDEN);
13210 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13211 Pmode, 1, align_4_label);
13214 mem = change_address (src, QImode, out);
13216 /* Now compare the bytes. */
13218 /* Compare the first n unaligned byte on a byte per byte basis. */
13219 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13220 QImode, 1, end_0_label);
13222 /* Increment the address. */
13224 emit_insn (gen_adddi3 (out, out, const1_rtx));
13226 emit_insn (gen_addsi3 (out, out, const1_rtx));
13228 /* Not needed with an alignment of 2 */
13231 emit_label (align_2_label);
13233 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13237 emit_insn (gen_adddi3 (out, out, const1_rtx));
13239 emit_insn (gen_addsi3 (out, out, const1_rtx));
13241 emit_label (align_3_label);
13244 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13248 emit_insn (gen_adddi3 (out, out, const1_rtx));
13250 emit_insn (gen_addsi3 (out, out, const1_rtx));
13253 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13254 align this loop. It gives only huge programs, but does not help to
13256 emit_label (align_4_label);
13258 mem = change_address (src, SImode, out);
13259 emit_move_insn (scratch, mem);
13261 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13263 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13265 /* This formula yields a nonzero result iff one of the bytes is zero.
13266 This saves three branches inside loop and many cycles. */
13268 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13269 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13270 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13271 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13272 gen_int_mode (0x80808080, SImode)));
13273 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13278 rtx reg = gen_reg_rtx (SImode);
13279 rtx reg2 = gen_reg_rtx (Pmode);
13280 emit_move_insn (reg, tmpreg);
13281 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13283 /* If zero is not in the first two bytes, move two bytes forward. */
13284 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13285 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13286 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13287 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13288 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13291 /* Emit lea manually to avoid clobbering of flags. */
13292 emit_insn (gen_rtx_SET (SImode, reg2,
13293 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13295 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13296 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13297 emit_insn (gen_rtx_SET (VOIDmode, out,
13298 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13305 rtx end_2_label = gen_label_rtx ();
13306 /* Is zero in the first two bytes? */
13308 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13309 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13310 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13311 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13312 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13314 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13315 JUMP_LABEL (tmp) = end_2_label;
13317 /* Not in the first two. Move two bytes forward. */
13318 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13320 emit_insn (gen_adddi3 (out, out, const2_rtx));
13322 emit_insn (gen_addsi3 (out, out, const2_rtx));
13324 emit_label (end_2_label);
13328 /* Avoid branch in fixing the byte. */
13329 tmpreg = gen_lowpart (QImode, tmpreg);
13330 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13331 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13333 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13335 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13337 emit_label (end_0_label);
13341 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13342 rtx callarg2 ATTRIBUTE_UNUSED,
13343 rtx pop, int sibcall)
13345 rtx use = NULL, call;
13347 if (pop == const0_rtx)
13349 gcc_assert (!TARGET_64BIT || !pop);
13351 if (TARGET_MACHO && !TARGET_64BIT)
13354 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13355 fnaddr = machopic_indirect_call_target (fnaddr);
13360 /* Static functions and indirect calls don't need the pic register. */
13361 if (! TARGET_64BIT && flag_pic
13362 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13363 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13364 use_reg (&use, pic_offset_table_rtx);
13367 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13369 rtx al = gen_rtx_REG (QImode, 0);
13370 emit_move_insn (al, callarg2);
13371 use_reg (&use, al);
13374 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13376 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13377 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13379 if (sibcall && TARGET_64BIT
13380 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13383 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13384 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13385 emit_move_insn (fnaddr, addr);
13386 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13389 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13391 call = gen_rtx_SET (VOIDmode, retval, call);
13394 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13395 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13396 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13399 call = emit_call_insn (call);
13401 CALL_INSN_FUNCTION_USAGE (call) = use;
13405 /* Clear stack slot assignments remembered from previous functions.
13406 This is called from INIT_EXPANDERS once before RTL is emitted for each
13409 static struct machine_function *
13410 ix86_init_machine_status (void)
13412 struct machine_function *f;
13414 f = ggc_alloc_cleared (sizeof (struct machine_function));
13415 f->use_fast_prologue_epilogue_nregs = -1;
13416 f->tls_descriptor_call_expanded_p = 0;
13421 /* Return a MEM corresponding to a stack slot with mode MODE.
13422 Allocate a new slot if necessary.
13424 The RTL for a function can have several slots available: N is
13425 which slot to use. */
13428 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13430 struct stack_local_entry *s;
13432 gcc_assert (n < MAX_386_STACK_LOCALS);
13434 for (s = ix86_stack_locals; s; s = s->next)
13435 if (s->mode == mode && s->n == n)
13438 s = (struct stack_local_entry *)
13439 ggc_alloc (sizeof (struct stack_local_entry));
13442 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13444 s->next = ix86_stack_locals;
13445 ix86_stack_locals = s;
13449 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13451 static GTY(()) rtx ix86_tls_symbol;
13453 ix86_tls_get_addr (void)
13456 if (!ix86_tls_symbol)
13458 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13459 (TARGET_ANY_GNU_TLS
13461 ? "___tls_get_addr"
13462 : "__tls_get_addr");
13465 return ix86_tls_symbol;
13468 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13470 static GTY(()) rtx ix86_tls_module_base_symbol;
13472 ix86_tls_module_base (void)
13475 if (!ix86_tls_module_base_symbol)
13477 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13478 "_TLS_MODULE_BASE_");
13479 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13480 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13483 return ix86_tls_module_base_symbol;
13486 /* Calculate the length of the memory address in the instruction
13487 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13490 memory_address_length (rtx addr)
13492 struct ix86_address parts;
13493 rtx base, index, disp;
13497 if (GET_CODE (addr) == PRE_DEC
13498 || GET_CODE (addr) == POST_INC
13499 || GET_CODE (addr) == PRE_MODIFY
13500 || GET_CODE (addr) == POST_MODIFY)
13503 ok = ix86_decompose_address (addr, &parts);
13506 if (parts.base && GET_CODE (parts.base) == SUBREG)
13507 parts.base = SUBREG_REG (parts.base);
13508 if (parts.index && GET_CODE (parts.index) == SUBREG)
13509 parts.index = SUBREG_REG (parts.index);
13512 index = parts.index;
13517 - esp as the base always wants an index,
13518 - ebp as the base always wants a displacement. */
13520 /* Register Indirect. */
13521 if (base && !index && !disp)
13523 /* esp (for its index) and ebp (for its displacement) need
13524 the two-byte modrm form. */
13525 if (addr == stack_pointer_rtx
13526 || addr == arg_pointer_rtx
13527 || addr == frame_pointer_rtx
13528 || addr == hard_frame_pointer_rtx)
13532 /* Direct Addressing. */
13533 else if (disp && !base && !index)
13538 /* Find the length of the displacement constant. */
13541 if (base && satisfies_constraint_K (disp))
13546 /* ebp always wants a displacement. */
13547 else if (base == hard_frame_pointer_rtx)
13550 /* An index requires the two-byte modrm form.... */
13552 /* ...like esp, which always wants an index. */
13553 || base == stack_pointer_rtx
13554 || base == arg_pointer_rtx
13555 || base == frame_pointer_rtx)
13562 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13563 is set, expect that insn have 8bit immediate alternative. */
13565 ix86_attr_length_immediate_default (rtx insn, int shortform)
13569 extract_insn_cached (insn);
13570 for (i = recog_data.n_operands - 1; i >= 0; --i)
13571 if (CONSTANT_P (recog_data.operand[i]))
13574 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13578 switch (get_attr_mode (insn))
13589 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13594 fatal_insn ("unknown insn mode", insn);
13600 /* Compute default value for "length_address" attribute. */
13602 ix86_attr_length_address_default (rtx insn)
13606 if (get_attr_type (insn) == TYPE_LEA)
13608 rtx set = PATTERN (insn);
13610 if (GET_CODE (set) == PARALLEL)
13611 set = XVECEXP (set, 0, 0);
13613 gcc_assert (GET_CODE (set) == SET);
13615 return memory_address_length (SET_SRC (set));
13618 extract_insn_cached (insn);
13619 for (i = recog_data.n_operands - 1; i >= 0; --i)
13620 if (GET_CODE (recog_data.operand[i]) == MEM)
13622 return memory_address_length (XEXP (recog_data.operand[i], 0));
13628 /* Return the maximum number of instructions a cpu can issue. */
13631 ix86_issue_rate (void)
13635 case PROCESSOR_PENTIUM:
13639 case PROCESSOR_PENTIUMPRO:
13640 case PROCESSOR_PENTIUM4:
13641 case PROCESSOR_ATHLON:
13643 case PROCESSOR_NOCONA:
13644 case PROCESSOR_GENERIC32:
13645 case PROCESSOR_GENERIC64:
13653 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13654 by DEP_INSN and nothing set by DEP_INSN. */
13657 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13661 /* Simplify the test for uninteresting insns. */
13662 if (insn_type != TYPE_SETCC
13663 && insn_type != TYPE_ICMOV
13664 && insn_type != TYPE_FCMOV
13665 && insn_type != TYPE_IBR)
13668 if ((set = single_set (dep_insn)) != 0)
13670 set = SET_DEST (set);
13673 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13674 && XVECLEN (PATTERN (dep_insn), 0) == 2
13675 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13676 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13678 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13679 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13684 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13687 /* This test is true if the dependent insn reads the flags but
13688 not any other potentially set register. */
13689 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13692 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13698 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13699 address with operands set by DEP_INSN. */
13702 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13706 if (insn_type == TYPE_LEA
13709 addr = PATTERN (insn);
13711 if (GET_CODE (addr) == PARALLEL)
13712 addr = XVECEXP (addr, 0, 0);
13714 gcc_assert (GET_CODE (addr) == SET);
13716 addr = SET_SRC (addr);
13721 extract_insn_cached (insn);
13722 for (i = recog_data.n_operands - 1; i >= 0; --i)
13723 if (GET_CODE (recog_data.operand[i]) == MEM)
13725 addr = XEXP (recog_data.operand[i], 0);
13732 return modified_in_p (addr, dep_insn);
13736 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13738 enum attr_type insn_type, dep_insn_type;
13739 enum attr_memory memory;
13741 int dep_insn_code_number;
13743 /* Anti and output dependencies have zero cost on all CPUs. */
13744 if (REG_NOTE_KIND (link) != 0)
13747 dep_insn_code_number = recog_memoized (dep_insn);
13749 /* If we can't recognize the insns, we can't really do anything. */
13750 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13753 insn_type = get_attr_type (insn);
13754 dep_insn_type = get_attr_type (dep_insn);
13758 case PROCESSOR_PENTIUM:
13759 /* Address Generation Interlock adds a cycle of latency. */
13760 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13763 /* ??? Compares pair with jump/setcc. */
13764 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13767 /* Floating point stores require value to be ready one cycle earlier. */
13768 if (insn_type == TYPE_FMOV
13769 && get_attr_memory (insn) == MEMORY_STORE
13770 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13774 case PROCESSOR_PENTIUMPRO:
13775 memory = get_attr_memory (insn);
13777 /* INT->FP conversion is expensive. */
13778 if (get_attr_fp_int_src (dep_insn))
13781 /* There is one cycle extra latency between an FP op and a store. */
13782 if (insn_type == TYPE_FMOV
13783 && (set = single_set (dep_insn)) != NULL_RTX
13784 && (set2 = single_set (insn)) != NULL_RTX
13785 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13786 && GET_CODE (SET_DEST (set2)) == MEM)
13789 /* Show ability of reorder buffer to hide latency of load by executing
13790 in parallel with previous instruction in case
13791 previous instruction is not needed to compute the address. */
13792 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13793 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13795 /* Claim moves to take one cycle, as core can issue one load
13796 at time and the next load can start cycle later. */
13797 if (dep_insn_type == TYPE_IMOV
13798 || dep_insn_type == TYPE_FMOV)
13806 memory = get_attr_memory (insn);
13808 /* The esp dependency is resolved before the instruction is really
13810 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13811 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13814 /* INT->FP conversion is expensive. */
13815 if (get_attr_fp_int_src (dep_insn))
13818 /* Show ability of reorder buffer to hide latency of load by executing
13819 in parallel with previous instruction in case
13820 previous instruction is not needed to compute the address. */
13821 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13822 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13824 /* Claim moves to take one cycle, as core can issue one load
13825 at time and the next load can start cycle later. */
13826 if (dep_insn_type == TYPE_IMOV
13827 || dep_insn_type == TYPE_FMOV)
13836 case PROCESSOR_ATHLON:
13838 case PROCESSOR_GENERIC32:
13839 case PROCESSOR_GENERIC64:
13840 memory = get_attr_memory (insn);
13842 /* Show ability of reorder buffer to hide latency of load by executing
13843 in parallel with previous instruction in case
13844 previous instruction is not needed to compute the address. */
13845 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13846 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13848 enum attr_unit unit = get_attr_unit (insn);
13851 /* Because of the difference between the length of integer and
13852 floating unit pipeline preparation stages, the memory operands
13853 for floating point are cheaper.
13855 ??? For Athlon it the difference is most probably 2. */
13856 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13859 loadcost = TARGET_ATHLON ? 2 : 0;
13861 if (cost >= loadcost)
13874 /* How many alternative schedules to try. This should be as wide as the
13875 scheduling freedom in the DFA, but no wider. Making this value too
13876 large results extra work for the scheduler. */
13879 ia32_multipass_dfa_lookahead (void)
13881 if (ix86_tune == PROCESSOR_PENTIUM)
13884 if (ix86_tune == PROCESSOR_PENTIUMPRO
13885 || ix86_tune == PROCESSOR_K6)
13893 /* Compute the alignment given to a constant that is being placed in memory.
13894 EXP is the constant and ALIGN is the alignment that the object would
13896 The value of this function is used instead of that alignment to align
13900 ix86_constant_alignment (tree exp, int align)
13902 if (TREE_CODE (exp) == REAL_CST)
13904 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13906 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13909 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13910 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13911 return BITS_PER_WORD;
13916 /* Compute the alignment for a static variable.
13917 TYPE is the data type, and ALIGN is the alignment that
13918 the object would ordinarily have. The value of this function is used
13919 instead of that alignment to align the object. */
13922 ix86_data_alignment (tree type, int align)
13924 int max_align = optimize_size ? BITS_PER_WORD : 256;
13926 if (AGGREGATE_TYPE_P (type)
13927 && TYPE_SIZE (type)
13928 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13929 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13930 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13931 && align < max_align)
13934 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13935 to 16byte boundary. */
13938 if (AGGREGATE_TYPE_P (type)
13939 && TYPE_SIZE (type)
13940 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13941 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13942 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13946 if (TREE_CODE (type) == ARRAY_TYPE)
13948 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13950 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13953 else if (TREE_CODE (type) == COMPLEX_TYPE)
13956 if (TYPE_MODE (type) == DCmode && align < 64)
13958 if (TYPE_MODE (type) == XCmode && align < 128)
13961 else if ((TREE_CODE (type) == RECORD_TYPE
13962 || TREE_CODE (type) == UNION_TYPE
13963 || TREE_CODE (type) == QUAL_UNION_TYPE)
13964 && TYPE_FIELDS (type))
13966 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13968 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13971 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13972 || TREE_CODE (type) == INTEGER_TYPE)
13974 if (TYPE_MODE (type) == DFmode && align < 64)
13976 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13983 /* Compute the alignment for a local variable.
13984 TYPE is the data type, and ALIGN is the alignment that
13985 the object would ordinarily have. The value of this macro is used
13986 instead of that alignment to align the object. */
13989 ix86_local_alignment (tree type, int align)
13991 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13992 to 16byte boundary. */
13995 if (AGGREGATE_TYPE_P (type)
13996 && TYPE_SIZE (type)
13997 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13998 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
13999 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14002 if (TREE_CODE (type) == ARRAY_TYPE)
14004 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14006 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14009 else if (TREE_CODE (type) == COMPLEX_TYPE)
14011 if (TYPE_MODE (type) == DCmode && align < 64)
14013 if (TYPE_MODE (type) == XCmode && align < 128)
14016 else if ((TREE_CODE (type) == RECORD_TYPE
14017 || TREE_CODE (type) == UNION_TYPE
14018 || TREE_CODE (type) == QUAL_UNION_TYPE)
14019 && TYPE_FIELDS (type))
14021 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14023 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14026 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14027 || TREE_CODE (type) == INTEGER_TYPE)
14030 if (TYPE_MODE (type) == DFmode && align < 64)
14032 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14038 /* Emit RTL insns to initialize the variable parts of a trampoline.
14039 FNADDR is an RTX for the address of the function's pure code.
14040 CXT is an RTX for the static chain value for the function. */
14042 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14046 /* Compute offset from the end of the jmp to the target function. */
14047 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14048 plus_constant (tramp, 10),
14049 NULL_RTX, 1, OPTAB_DIRECT);
14050 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14051 gen_int_mode (0xb9, QImode));
14052 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14053 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14054 gen_int_mode (0xe9, QImode));
14055 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14060 /* Try to load address using shorter movl instead of movabs.
14061 We may want to support movq for kernel mode, but kernel does not use
14062 trampolines at the moment. */
14063 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14065 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14066 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14067 gen_int_mode (0xbb41, HImode));
14068 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14069 gen_lowpart (SImode, fnaddr));
14074 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14075 gen_int_mode (0xbb49, HImode));
14076 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14080 /* Load static chain using movabs to r10. */
14081 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14082 gen_int_mode (0xba49, HImode));
14083 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14086 /* Jump to the r11 */
14087 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14088 gen_int_mode (0xff49, HImode));
14089 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14090 gen_int_mode (0xe3, QImode));
14092 gcc_assert (offset <= TRAMPOLINE_SIZE);
14095 #ifdef ENABLE_EXECUTE_STACK
14096 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14097 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14101 /* Codes for all the SSE/MMX builtins. */
14104 IX86_BUILTIN_ADDPS,
14105 IX86_BUILTIN_ADDSS,
14106 IX86_BUILTIN_DIVPS,
14107 IX86_BUILTIN_DIVSS,
14108 IX86_BUILTIN_MULPS,
14109 IX86_BUILTIN_MULSS,
14110 IX86_BUILTIN_SUBPS,
14111 IX86_BUILTIN_SUBSS,
14113 IX86_BUILTIN_CMPEQPS,
14114 IX86_BUILTIN_CMPLTPS,
14115 IX86_BUILTIN_CMPLEPS,
14116 IX86_BUILTIN_CMPGTPS,
14117 IX86_BUILTIN_CMPGEPS,
14118 IX86_BUILTIN_CMPNEQPS,
14119 IX86_BUILTIN_CMPNLTPS,
14120 IX86_BUILTIN_CMPNLEPS,
14121 IX86_BUILTIN_CMPNGTPS,
14122 IX86_BUILTIN_CMPNGEPS,
14123 IX86_BUILTIN_CMPORDPS,
14124 IX86_BUILTIN_CMPUNORDPS,
14125 IX86_BUILTIN_CMPEQSS,
14126 IX86_BUILTIN_CMPLTSS,
14127 IX86_BUILTIN_CMPLESS,
14128 IX86_BUILTIN_CMPNEQSS,
14129 IX86_BUILTIN_CMPNLTSS,
14130 IX86_BUILTIN_CMPNLESS,
14131 IX86_BUILTIN_CMPNGTSS,
14132 IX86_BUILTIN_CMPNGESS,
14133 IX86_BUILTIN_CMPORDSS,
14134 IX86_BUILTIN_CMPUNORDSS,
14136 IX86_BUILTIN_COMIEQSS,
14137 IX86_BUILTIN_COMILTSS,
14138 IX86_BUILTIN_COMILESS,
14139 IX86_BUILTIN_COMIGTSS,
14140 IX86_BUILTIN_COMIGESS,
14141 IX86_BUILTIN_COMINEQSS,
14142 IX86_BUILTIN_UCOMIEQSS,
14143 IX86_BUILTIN_UCOMILTSS,
14144 IX86_BUILTIN_UCOMILESS,
14145 IX86_BUILTIN_UCOMIGTSS,
14146 IX86_BUILTIN_UCOMIGESS,
14147 IX86_BUILTIN_UCOMINEQSS,
14149 IX86_BUILTIN_CVTPI2PS,
14150 IX86_BUILTIN_CVTPS2PI,
14151 IX86_BUILTIN_CVTSI2SS,
14152 IX86_BUILTIN_CVTSI642SS,
14153 IX86_BUILTIN_CVTSS2SI,
14154 IX86_BUILTIN_CVTSS2SI64,
14155 IX86_BUILTIN_CVTTPS2PI,
14156 IX86_BUILTIN_CVTTSS2SI,
14157 IX86_BUILTIN_CVTTSS2SI64,
14159 IX86_BUILTIN_MAXPS,
14160 IX86_BUILTIN_MAXSS,
14161 IX86_BUILTIN_MINPS,
14162 IX86_BUILTIN_MINSS,
14164 IX86_BUILTIN_LOADUPS,
14165 IX86_BUILTIN_STOREUPS,
14166 IX86_BUILTIN_MOVSS,
14168 IX86_BUILTIN_MOVHLPS,
14169 IX86_BUILTIN_MOVLHPS,
14170 IX86_BUILTIN_LOADHPS,
14171 IX86_BUILTIN_LOADLPS,
14172 IX86_BUILTIN_STOREHPS,
14173 IX86_BUILTIN_STORELPS,
14175 IX86_BUILTIN_MASKMOVQ,
14176 IX86_BUILTIN_MOVMSKPS,
14177 IX86_BUILTIN_PMOVMSKB,
14179 IX86_BUILTIN_MOVNTPS,
14180 IX86_BUILTIN_MOVNTQ,
14182 IX86_BUILTIN_LOADDQU,
14183 IX86_BUILTIN_STOREDQU,
14185 IX86_BUILTIN_PACKSSWB,
14186 IX86_BUILTIN_PACKSSDW,
14187 IX86_BUILTIN_PACKUSWB,
14189 IX86_BUILTIN_PADDB,
14190 IX86_BUILTIN_PADDW,
14191 IX86_BUILTIN_PADDD,
14192 IX86_BUILTIN_PADDQ,
14193 IX86_BUILTIN_PADDSB,
14194 IX86_BUILTIN_PADDSW,
14195 IX86_BUILTIN_PADDUSB,
14196 IX86_BUILTIN_PADDUSW,
14197 IX86_BUILTIN_PSUBB,
14198 IX86_BUILTIN_PSUBW,
14199 IX86_BUILTIN_PSUBD,
14200 IX86_BUILTIN_PSUBQ,
14201 IX86_BUILTIN_PSUBSB,
14202 IX86_BUILTIN_PSUBSW,
14203 IX86_BUILTIN_PSUBUSB,
14204 IX86_BUILTIN_PSUBUSW,
14207 IX86_BUILTIN_PANDN,
14211 IX86_BUILTIN_PAVGB,
14212 IX86_BUILTIN_PAVGW,
14214 IX86_BUILTIN_PCMPEQB,
14215 IX86_BUILTIN_PCMPEQW,
14216 IX86_BUILTIN_PCMPEQD,
14217 IX86_BUILTIN_PCMPGTB,
14218 IX86_BUILTIN_PCMPGTW,
14219 IX86_BUILTIN_PCMPGTD,
14221 IX86_BUILTIN_PMADDWD,
14223 IX86_BUILTIN_PMAXSW,
14224 IX86_BUILTIN_PMAXUB,
14225 IX86_BUILTIN_PMINSW,
14226 IX86_BUILTIN_PMINUB,
14228 IX86_BUILTIN_PMULHUW,
14229 IX86_BUILTIN_PMULHW,
14230 IX86_BUILTIN_PMULLW,
14232 IX86_BUILTIN_PSADBW,
14233 IX86_BUILTIN_PSHUFW,
14235 IX86_BUILTIN_PSLLW,
14236 IX86_BUILTIN_PSLLD,
14237 IX86_BUILTIN_PSLLQ,
14238 IX86_BUILTIN_PSRAW,
14239 IX86_BUILTIN_PSRAD,
14240 IX86_BUILTIN_PSRLW,
14241 IX86_BUILTIN_PSRLD,
14242 IX86_BUILTIN_PSRLQ,
14243 IX86_BUILTIN_PSLLWI,
14244 IX86_BUILTIN_PSLLDI,
14245 IX86_BUILTIN_PSLLQI,
14246 IX86_BUILTIN_PSRAWI,
14247 IX86_BUILTIN_PSRADI,
14248 IX86_BUILTIN_PSRLWI,
14249 IX86_BUILTIN_PSRLDI,
14250 IX86_BUILTIN_PSRLQI,
14252 IX86_BUILTIN_PUNPCKHBW,
14253 IX86_BUILTIN_PUNPCKHWD,
14254 IX86_BUILTIN_PUNPCKHDQ,
14255 IX86_BUILTIN_PUNPCKLBW,
14256 IX86_BUILTIN_PUNPCKLWD,
14257 IX86_BUILTIN_PUNPCKLDQ,
14259 IX86_BUILTIN_SHUFPS,
14261 IX86_BUILTIN_RCPPS,
14262 IX86_BUILTIN_RCPSS,
14263 IX86_BUILTIN_RSQRTPS,
14264 IX86_BUILTIN_RSQRTSS,
14265 IX86_BUILTIN_SQRTPS,
14266 IX86_BUILTIN_SQRTSS,
14268 IX86_BUILTIN_UNPCKHPS,
14269 IX86_BUILTIN_UNPCKLPS,
14271 IX86_BUILTIN_ANDPS,
14272 IX86_BUILTIN_ANDNPS,
14274 IX86_BUILTIN_XORPS,
14277 IX86_BUILTIN_LDMXCSR,
14278 IX86_BUILTIN_STMXCSR,
14279 IX86_BUILTIN_SFENCE,
14281 /* 3DNow! Original */
14282 IX86_BUILTIN_FEMMS,
14283 IX86_BUILTIN_PAVGUSB,
14284 IX86_BUILTIN_PF2ID,
14285 IX86_BUILTIN_PFACC,
14286 IX86_BUILTIN_PFADD,
14287 IX86_BUILTIN_PFCMPEQ,
14288 IX86_BUILTIN_PFCMPGE,
14289 IX86_BUILTIN_PFCMPGT,
14290 IX86_BUILTIN_PFMAX,
14291 IX86_BUILTIN_PFMIN,
14292 IX86_BUILTIN_PFMUL,
14293 IX86_BUILTIN_PFRCP,
14294 IX86_BUILTIN_PFRCPIT1,
14295 IX86_BUILTIN_PFRCPIT2,
14296 IX86_BUILTIN_PFRSQIT1,
14297 IX86_BUILTIN_PFRSQRT,
14298 IX86_BUILTIN_PFSUB,
14299 IX86_BUILTIN_PFSUBR,
14300 IX86_BUILTIN_PI2FD,
14301 IX86_BUILTIN_PMULHRW,
14303 /* 3DNow! Athlon Extensions */
14304 IX86_BUILTIN_PF2IW,
14305 IX86_BUILTIN_PFNACC,
14306 IX86_BUILTIN_PFPNACC,
14307 IX86_BUILTIN_PI2FW,
14308 IX86_BUILTIN_PSWAPDSI,
14309 IX86_BUILTIN_PSWAPDSF,
14312 IX86_BUILTIN_ADDPD,
14313 IX86_BUILTIN_ADDSD,
14314 IX86_BUILTIN_DIVPD,
14315 IX86_BUILTIN_DIVSD,
14316 IX86_BUILTIN_MULPD,
14317 IX86_BUILTIN_MULSD,
14318 IX86_BUILTIN_SUBPD,
14319 IX86_BUILTIN_SUBSD,
14321 IX86_BUILTIN_CMPEQPD,
14322 IX86_BUILTIN_CMPLTPD,
14323 IX86_BUILTIN_CMPLEPD,
14324 IX86_BUILTIN_CMPGTPD,
14325 IX86_BUILTIN_CMPGEPD,
14326 IX86_BUILTIN_CMPNEQPD,
14327 IX86_BUILTIN_CMPNLTPD,
14328 IX86_BUILTIN_CMPNLEPD,
14329 IX86_BUILTIN_CMPNGTPD,
14330 IX86_BUILTIN_CMPNGEPD,
14331 IX86_BUILTIN_CMPORDPD,
14332 IX86_BUILTIN_CMPUNORDPD,
14333 IX86_BUILTIN_CMPNEPD,
14334 IX86_BUILTIN_CMPEQSD,
14335 IX86_BUILTIN_CMPLTSD,
14336 IX86_BUILTIN_CMPLESD,
14337 IX86_BUILTIN_CMPNEQSD,
14338 IX86_BUILTIN_CMPNLTSD,
14339 IX86_BUILTIN_CMPNLESD,
14340 IX86_BUILTIN_CMPORDSD,
14341 IX86_BUILTIN_CMPUNORDSD,
14342 IX86_BUILTIN_CMPNESD,
14344 IX86_BUILTIN_COMIEQSD,
14345 IX86_BUILTIN_COMILTSD,
14346 IX86_BUILTIN_COMILESD,
14347 IX86_BUILTIN_COMIGTSD,
14348 IX86_BUILTIN_COMIGESD,
14349 IX86_BUILTIN_COMINEQSD,
14350 IX86_BUILTIN_UCOMIEQSD,
14351 IX86_BUILTIN_UCOMILTSD,
14352 IX86_BUILTIN_UCOMILESD,
14353 IX86_BUILTIN_UCOMIGTSD,
14354 IX86_BUILTIN_UCOMIGESD,
14355 IX86_BUILTIN_UCOMINEQSD,
14357 IX86_BUILTIN_MAXPD,
14358 IX86_BUILTIN_MAXSD,
14359 IX86_BUILTIN_MINPD,
14360 IX86_BUILTIN_MINSD,
14362 IX86_BUILTIN_ANDPD,
14363 IX86_BUILTIN_ANDNPD,
14365 IX86_BUILTIN_XORPD,
14367 IX86_BUILTIN_SQRTPD,
14368 IX86_BUILTIN_SQRTSD,
14370 IX86_BUILTIN_UNPCKHPD,
14371 IX86_BUILTIN_UNPCKLPD,
14373 IX86_BUILTIN_SHUFPD,
14375 IX86_BUILTIN_LOADUPD,
14376 IX86_BUILTIN_STOREUPD,
14377 IX86_BUILTIN_MOVSD,
14379 IX86_BUILTIN_LOADHPD,
14380 IX86_BUILTIN_LOADLPD,
14382 IX86_BUILTIN_CVTDQ2PD,
14383 IX86_BUILTIN_CVTDQ2PS,
14385 IX86_BUILTIN_CVTPD2DQ,
14386 IX86_BUILTIN_CVTPD2PI,
14387 IX86_BUILTIN_CVTPD2PS,
14388 IX86_BUILTIN_CVTTPD2DQ,
14389 IX86_BUILTIN_CVTTPD2PI,
14391 IX86_BUILTIN_CVTPI2PD,
14392 IX86_BUILTIN_CVTSI2SD,
14393 IX86_BUILTIN_CVTSI642SD,
14395 IX86_BUILTIN_CVTSD2SI,
14396 IX86_BUILTIN_CVTSD2SI64,
14397 IX86_BUILTIN_CVTSD2SS,
14398 IX86_BUILTIN_CVTSS2SD,
14399 IX86_BUILTIN_CVTTSD2SI,
14400 IX86_BUILTIN_CVTTSD2SI64,
14402 IX86_BUILTIN_CVTPS2DQ,
14403 IX86_BUILTIN_CVTPS2PD,
14404 IX86_BUILTIN_CVTTPS2DQ,
14406 IX86_BUILTIN_MOVNTI,
14407 IX86_BUILTIN_MOVNTPD,
14408 IX86_BUILTIN_MOVNTDQ,
14411 IX86_BUILTIN_MASKMOVDQU,
14412 IX86_BUILTIN_MOVMSKPD,
14413 IX86_BUILTIN_PMOVMSKB128,
14415 IX86_BUILTIN_PACKSSWB128,
14416 IX86_BUILTIN_PACKSSDW128,
14417 IX86_BUILTIN_PACKUSWB128,
14419 IX86_BUILTIN_PADDB128,
14420 IX86_BUILTIN_PADDW128,
14421 IX86_BUILTIN_PADDD128,
14422 IX86_BUILTIN_PADDQ128,
14423 IX86_BUILTIN_PADDSB128,
14424 IX86_BUILTIN_PADDSW128,
14425 IX86_BUILTIN_PADDUSB128,
14426 IX86_BUILTIN_PADDUSW128,
14427 IX86_BUILTIN_PSUBB128,
14428 IX86_BUILTIN_PSUBW128,
14429 IX86_BUILTIN_PSUBD128,
14430 IX86_BUILTIN_PSUBQ128,
14431 IX86_BUILTIN_PSUBSB128,
14432 IX86_BUILTIN_PSUBSW128,
14433 IX86_BUILTIN_PSUBUSB128,
14434 IX86_BUILTIN_PSUBUSW128,
14436 IX86_BUILTIN_PAND128,
14437 IX86_BUILTIN_PANDN128,
14438 IX86_BUILTIN_POR128,
14439 IX86_BUILTIN_PXOR128,
14441 IX86_BUILTIN_PAVGB128,
14442 IX86_BUILTIN_PAVGW128,
14444 IX86_BUILTIN_PCMPEQB128,
14445 IX86_BUILTIN_PCMPEQW128,
14446 IX86_BUILTIN_PCMPEQD128,
14447 IX86_BUILTIN_PCMPGTB128,
14448 IX86_BUILTIN_PCMPGTW128,
14449 IX86_BUILTIN_PCMPGTD128,
14451 IX86_BUILTIN_PMADDWD128,
14453 IX86_BUILTIN_PMAXSW128,
14454 IX86_BUILTIN_PMAXUB128,
14455 IX86_BUILTIN_PMINSW128,
14456 IX86_BUILTIN_PMINUB128,
14458 IX86_BUILTIN_PMULUDQ,
14459 IX86_BUILTIN_PMULUDQ128,
14460 IX86_BUILTIN_PMULHUW128,
14461 IX86_BUILTIN_PMULHW128,
14462 IX86_BUILTIN_PMULLW128,
14464 IX86_BUILTIN_PSADBW128,
14465 IX86_BUILTIN_PSHUFHW,
14466 IX86_BUILTIN_PSHUFLW,
14467 IX86_BUILTIN_PSHUFD,
14469 IX86_BUILTIN_PSLLW128,
14470 IX86_BUILTIN_PSLLD128,
14471 IX86_BUILTIN_PSLLQ128,
14472 IX86_BUILTIN_PSRAW128,
14473 IX86_BUILTIN_PSRAD128,
14474 IX86_BUILTIN_PSRLW128,
14475 IX86_BUILTIN_PSRLD128,
14476 IX86_BUILTIN_PSRLQ128,
14477 IX86_BUILTIN_PSLLDQI128,
14478 IX86_BUILTIN_PSLLWI128,
14479 IX86_BUILTIN_PSLLDI128,
14480 IX86_BUILTIN_PSLLQI128,
14481 IX86_BUILTIN_PSRAWI128,
14482 IX86_BUILTIN_PSRADI128,
14483 IX86_BUILTIN_PSRLDQI128,
14484 IX86_BUILTIN_PSRLWI128,
14485 IX86_BUILTIN_PSRLDI128,
14486 IX86_BUILTIN_PSRLQI128,
14488 IX86_BUILTIN_PUNPCKHBW128,
14489 IX86_BUILTIN_PUNPCKHWD128,
14490 IX86_BUILTIN_PUNPCKHDQ128,
14491 IX86_BUILTIN_PUNPCKHQDQ128,
14492 IX86_BUILTIN_PUNPCKLBW128,
14493 IX86_BUILTIN_PUNPCKLWD128,
14494 IX86_BUILTIN_PUNPCKLDQ128,
14495 IX86_BUILTIN_PUNPCKLQDQ128,
14497 IX86_BUILTIN_CLFLUSH,
14498 IX86_BUILTIN_MFENCE,
14499 IX86_BUILTIN_LFENCE,
14501 /* Prescott New Instructions. */
14502 IX86_BUILTIN_ADDSUBPS,
14503 IX86_BUILTIN_HADDPS,
14504 IX86_BUILTIN_HSUBPS,
14505 IX86_BUILTIN_MOVSHDUP,
14506 IX86_BUILTIN_MOVSLDUP,
14507 IX86_BUILTIN_ADDSUBPD,
14508 IX86_BUILTIN_HADDPD,
14509 IX86_BUILTIN_HSUBPD,
14510 IX86_BUILTIN_LDDQU,
14512 IX86_BUILTIN_MONITOR,
14513 IX86_BUILTIN_MWAIT,
14515 IX86_BUILTIN_VEC_INIT_V2SI,
14516 IX86_BUILTIN_VEC_INIT_V4HI,
14517 IX86_BUILTIN_VEC_INIT_V8QI,
14518 IX86_BUILTIN_VEC_EXT_V2DF,
14519 IX86_BUILTIN_VEC_EXT_V2DI,
14520 IX86_BUILTIN_VEC_EXT_V4SF,
14521 IX86_BUILTIN_VEC_EXT_V4SI,
14522 IX86_BUILTIN_VEC_EXT_V8HI,
14523 IX86_BUILTIN_VEC_EXT_V2SI,
14524 IX86_BUILTIN_VEC_EXT_V4HI,
14525 IX86_BUILTIN_VEC_SET_V8HI,
14526 IX86_BUILTIN_VEC_SET_V4HI,
14531 #define def_builtin(MASK, NAME, TYPE, CODE) \
14533 if ((MASK) & target_flags \
14534 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14535 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14536 NULL, NULL_TREE); \
14539 /* Bits for builtin_description.flag. */
14541 /* Set when we don't support the comparison natively, and should
14542 swap_comparison in order to support it. */
14543 #define BUILTIN_DESC_SWAP_OPERANDS 1
14545 struct builtin_description
14547 const unsigned int mask;
14548 const enum insn_code icode;
14549 const char *const name;
14550 const enum ix86_builtins code;
14551 const enum rtx_code comparison;
14552 const unsigned int flag;
14555 static const struct builtin_description bdesc_comi[] =
14557 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14558 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14559 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14560 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14561 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14562 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14563 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14564 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14565 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14566 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14567 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14568 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14569 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14570 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14571 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14572 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14573 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14574 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14575 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14576 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14577 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14578 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14579 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14580 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14583 static const struct builtin_description bdesc_2arg[] =
14586 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14587 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14588 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14589 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14590 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14591 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14592 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14593 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14595 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14596 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14597 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14598 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14599 BUILTIN_DESC_SWAP_OPERANDS },
14600 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14601 BUILTIN_DESC_SWAP_OPERANDS },
14602 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14603 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14604 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14605 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14606 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14607 BUILTIN_DESC_SWAP_OPERANDS },
14608 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14609 BUILTIN_DESC_SWAP_OPERANDS },
14610 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14611 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14612 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14613 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14614 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14615 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14616 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14617 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14618 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14619 BUILTIN_DESC_SWAP_OPERANDS },
14620 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14621 BUILTIN_DESC_SWAP_OPERANDS },
14622 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14624 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14625 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14626 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14627 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14629 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14630 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14631 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14632 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14634 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14635 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14636 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14637 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14638 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14641 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14642 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14643 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14644 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14645 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14646 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14647 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14648 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14650 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14651 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14652 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14653 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14654 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14655 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14656 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14657 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14659 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14660 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14661 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14663 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14664 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14665 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14666 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14668 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14669 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14671 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14672 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14673 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14674 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14675 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14676 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14678 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14679 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14680 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14681 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14683 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14684 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14685 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14686 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14687 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14688 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14691 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14692 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14693 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14695 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14696 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14697 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14699 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14700 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14701 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14702 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14703 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14704 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14706 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14707 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14708 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14709 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14710 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14711 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14713 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14714 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14715 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14716 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14718 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14719 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14722 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14723 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14724 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14725 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14726 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14727 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14728 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14729 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14731 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14732 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14733 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14734 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14735 BUILTIN_DESC_SWAP_OPERANDS },
14736 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14737 BUILTIN_DESC_SWAP_OPERANDS },
14738 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14739 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14740 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14741 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14742 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14743 BUILTIN_DESC_SWAP_OPERANDS },
14744 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14745 BUILTIN_DESC_SWAP_OPERANDS },
14746 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14747 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14748 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14749 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14750 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14751 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14752 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14753 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14754 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14756 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14757 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14758 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14759 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14761 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14762 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14763 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14764 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14766 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14767 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14768 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14771 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14772 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14773 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14774 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14775 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14776 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14777 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14778 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14780 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14781 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14782 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14783 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14784 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14785 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14786 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14787 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14789 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14790 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14792 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14793 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14794 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14795 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14797 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14798 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14800 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14801 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14802 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14803 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14804 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14805 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14807 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14808 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14809 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14810 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14812 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14813 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14814 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14815 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14816 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14817 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14818 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14819 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14821 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14822 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14823 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14825 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14826 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14828 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14829 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14831 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14832 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14833 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14835 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14836 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14837 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14839 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14840 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14842 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14844 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14845 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14846 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14847 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14850 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14851 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14852 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14853 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14854 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14855 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14858 static const struct builtin_description bdesc_1arg[] =
14860 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14861 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14863 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14864 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14865 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14867 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14868 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14869 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14870 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14871 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14872 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14874 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14875 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14877 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14879 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14880 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14882 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14883 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14884 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14885 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14886 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14888 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14890 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14891 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14892 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14893 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14895 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14896 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14897 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14900 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14901 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14905 ix86_init_builtins (void)
14908 ix86_init_mmx_sse_builtins ();
14911 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14912 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14915 ix86_init_mmx_sse_builtins (void)
14917 const struct builtin_description * d;
14920 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14921 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14922 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14923 tree V2DI_type_node
14924 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14925 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14926 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14927 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14928 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14929 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14930 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14932 tree pchar_type_node = build_pointer_type (char_type_node);
14933 tree pcchar_type_node = build_pointer_type (
14934 build_type_variant (char_type_node, 1, 0));
14935 tree pfloat_type_node = build_pointer_type (float_type_node);
14936 tree pcfloat_type_node = build_pointer_type (
14937 build_type_variant (float_type_node, 1, 0));
14938 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14939 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14940 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14943 tree int_ftype_v4sf_v4sf
14944 = build_function_type_list (integer_type_node,
14945 V4SF_type_node, V4SF_type_node, NULL_TREE);
14946 tree v4si_ftype_v4sf_v4sf
14947 = build_function_type_list (V4SI_type_node,
14948 V4SF_type_node, V4SF_type_node, NULL_TREE);
14949 /* MMX/SSE/integer conversions. */
14950 tree int_ftype_v4sf
14951 = build_function_type_list (integer_type_node,
14952 V4SF_type_node, NULL_TREE);
14953 tree int64_ftype_v4sf
14954 = build_function_type_list (long_long_integer_type_node,
14955 V4SF_type_node, NULL_TREE);
14956 tree int_ftype_v8qi
14957 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
14958 tree v4sf_ftype_v4sf_int
14959 = build_function_type_list (V4SF_type_node,
14960 V4SF_type_node, integer_type_node, NULL_TREE);
14961 tree v4sf_ftype_v4sf_int64
14962 = build_function_type_list (V4SF_type_node,
14963 V4SF_type_node, long_long_integer_type_node,
14965 tree v4sf_ftype_v4sf_v2si
14966 = build_function_type_list (V4SF_type_node,
14967 V4SF_type_node, V2SI_type_node, NULL_TREE);
14969 /* Miscellaneous. */
14970 tree v8qi_ftype_v4hi_v4hi
14971 = build_function_type_list (V8QI_type_node,
14972 V4HI_type_node, V4HI_type_node, NULL_TREE);
14973 tree v4hi_ftype_v2si_v2si
14974 = build_function_type_list (V4HI_type_node,
14975 V2SI_type_node, V2SI_type_node, NULL_TREE);
14976 tree v4sf_ftype_v4sf_v4sf_int
14977 = build_function_type_list (V4SF_type_node,
14978 V4SF_type_node, V4SF_type_node,
14979 integer_type_node, NULL_TREE);
14980 tree v2si_ftype_v4hi_v4hi
14981 = build_function_type_list (V2SI_type_node,
14982 V4HI_type_node, V4HI_type_node, NULL_TREE);
14983 tree v4hi_ftype_v4hi_int
14984 = build_function_type_list (V4HI_type_node,
14985 V4HI_type_node, integer_type_node, NULL_TREE);
14986 tree v4hi_ftype_v4hi_di
14987 = build_function_type_list (V4HI_type_node,
14988 V4HI_type_node, long_long_unsigned_type_node,
14990 tree v2si_ftype_v2si_di
14991 = build_function_type_list (V2SI_type_node,
14992 V2SI_type_node, long_long_unsigned_type_node,
14994 tree void_ftype_void
14995 = build_function_type (void_type_node, void_list_node);
14996 tree void_ftype_unsigned
14997 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
14998 tree void_ftype_unsigned_unsigned
14999 = build_function_type_list (void_type_node, unsigned_type_node,
15000 unsigned_type_node, NULL_TREE);
15001 tree void_ftype_pcvoid_unsigned_unsigned
15002 = build_function_type_list (void_type_node, const_ptr_type_node,
15003 unsigned_type_node, unsigned_type_node,
15005 tree unsigned_ftype_void
15006 = build_function_type (unsigned_type_node, void_list_node);
15007 tree v2si_ftype_v4sf
15008 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15009 /* Loads/stores. */
15010 tree void_ftype_v8qi_v8qi_pchar
15011 = build_function_type_list (void_type_node,
15012 V8QI_type_node, V8QI_type_node,
15013 pchar_type_node, NULL_TREE);
15014 tree v4sf_ftype_pcfloat
15015 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15016 /* @@@ the type is bogus */
15017 tree v4sf_ftype_v4sf_pv2si
15018 = build_function_type_list (V4SF_type_node,
15019 V4SF_type_node, pv2si_type_node, NULL_TREE);
15020 tree void_ftype_pv2si_v4sf
15021 = build_function_type_list (void_type_node,
15022 pv2si_type_node, V4SF_type_node, NULL_TREE);
15023 tree void_ftype_pfloat_v4sf
15024 = build_function_type_list (void_type_node,
15025 pfloat_type_node, V4SF_type_node, NULL_TREE);
15026 tree void_ftype_pdi_di
15027 = build_function_type_list (void_type_node,
15028 pdi_type_node, long_long_unsigned_type_node,
15030 tree void_ftype_pv2di_v2di
15031 = build_function_type_list (void_type_node,
15032 pv2di_type_node, V2DI_type_node, NULL_TREE);
15033 /* Normal vector unops. */
15034 tree v4sf_ftype_v4sf
15035 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15037 /* Normal vector binops. */
15038 tree v4sf_ftype_v4sf_v4sf
15039 = build_function_type_list (V4SF_type_node,
15040 V4SF_type_node, V4SF_type_node, NULL_TREE);
15041 tree v8qi_ftype_v8qi_v8qi
15042 = build_function_type_list (V8QI_type_node,
15043 V8QI_type_node, V8QI_type_node, NULL_TREE);
15044 tree v4hi_ftype_v4hi_v4hi
15045 = build_function_type_list (V4HI_type_node,
15046 V4HI_type_node, V4HI_type_node, NULL_TREE);
15047 tree v2si_ftype_v2si_v2si
15048 = build_function_type_list (V2SI_type_node,
15049 V2SI_type_node, V2SI_type_node, NULL_TREE);
15050 tree di_ftype_di_di
15051 = build_function_type_list (long_long_unsigned_type_node,
15052 long_long_unsigned_type_node,
15053 long_long_unsigned_type_node, NULL_TREE);
15055 tree v2si_ftype_v2sf
15056 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15057 tree v2sf_ftype_v2si
15058 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15059 tree v2si_ftype_v2si
15060 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15061 tree v2sf_ftype_v2sf
15062 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15063 tree v2sf_ftype_v2sf_v2sf
15064 = build_function_type_list (V2SF_type_node,
15065 V2SF_type_node, V2SF_type_node, NULL_TREE);
15066 tree v2si_ftype_v2sf_v2sf
15067 = build_function_type_list (V2SI_type_node,
15068 V2SF_type_node, V2SF_type_node, NULL_TREE);
15069 tree pint_type_node = build_pointer_type (integer_type_node);
15070 tree pdouble_type_node = build_pointer_type (double_type_node);
15071 tree pcdouble_type_node = build_pointer_type (
15072 build_type_variant (double_type_node, 1, 0));
15073 tree int_ftype_v2df_v2df
15074 = build_function_type_list (integer_type_node,
15075 V2DF_type_node, V2DF_type_node, NULL_TREE);
15077 tree void_ftype_pcvoid
15078 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15079 tree v4sf_ftype_v4si
15080 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15081 tree v4si_ftype_v4sf
15082 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15083 tree v2df_ftype_v4si
15084 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15085 tree v4si_ftype_v2df
15086 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15087 tree v2si_ftype_v2df
15088 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15089 tree v4sf_ftype_v2df
15090 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15091 tree v2df_ftype_v2si
15092 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15093 tree v2df_ftype_v4sf
15094 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15095 tree int_ftype_v2df
15096 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15097 tree int64_ftype_v2df
15098 = build_function_type_list (long_long_integer_type_node,
15099 V2DF_type_node, NULL_TREE);
15100 tree v2df_ftype_v2df_int
15101 = build_function_type_list (V2DF_type_node,
15102 V2DF_type_node, integer_type_node, NULL_TREE);
15103 tree v2df_ftype_v2df_int64
15104 = build_function_type_list (V2DF_type_node,
15105 V2DF_type_node, long_long_integer_type_node,
15107 tree v4sf_ftype_v4sf_v2df
15108 = build_function_type_list (V4SF_type_node,
15109 V4SF_type_node, V2DF_type_node, NULL_TREE);
15110 tree v2df_ftype_v2df_v4sf
15111 = build_function_type_list (V2DF_type_node,
15112 V2DF_type_node, V4SF_type_node, NULL_TREE);
15113 tree v2df_ftype_v2df_v2df_int
15114 = build_function_type_list (V2DF_type_node,
15115 V2DF_type_node, V2DF_type_node,
15118 tree v2df_ftype_v2df_pcdouble
15119 = build_function_type_list (V2DF_type_node,
15120 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15121 tree void_ftype_pdouble_v2df
15122 = build_function_type_list (void_type_node,
15123 pdouble_type_node, V2DF_type_node, NULL_TREE);
15124 tree void_ftype_pint_int
15125 = build_function_type_list (void_type_node,
15126 pint_type_node, integer_type_node, NULL_TREE);
15127 tree void_ftype_v16qi_v16qi_pchar
15128 = build_function_type_list (void_type_node,
15129 V16QI_type_node, V16QI_type_node,
15130 pchar_type_node, NULL_TREE);
15131 tree v2df_ftype_pcdouble
15132 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15133 tree v2df_ftype_v2df_v2df
15134 = build_function_type_list (V2DF_type_node,
15135 V2DF_type_node, V2DF_type_node, NULL_TREE);
15136 tree v16qi_ftype_v16qi_v16qi
15137 = build_function_type_list (V16QI_type_node,
15138 V16QI_type_node, V16QI_type_node, NULL_TREE);
15139 tree v8hi_ftype_v8hi_v8hi
15140 = build_function_type_list (V8HI_type_node,
15141 V8HI_type_node, V8HI_type_node, NULL_TREE);
15142 tree v4si_ftype_v4si_v4si
15143 = build_function_type_list (V4SI_type_node,
15144 V4SI_type_node, V4SI_type_node, NULL_TREE);
15145 tree v2di_ftype_v2di_v2di
15146 = build_function_type_list (V2DI_type_node,
15147 V2DI_type_node, V2DI_type_node, NULL_TREE);
15148 tree v2di_ftype_v2df_v2df
15149 = build_function_type_list (V2DI_type_node,
15150 V2DF_type_node, V2DF_type_node, NULL_TREE);
15151 tree v2df_ftype_v2df
15152 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15153 tree v2di_ftype_v2di_int
15154 = build_function_type_list (V2DI_type_node,
15155 V2DI_type_node, integer_type_node, NULL_TREE);
15156 tree v4si_ftype_v4si_int
15157 = build_function_type_list (V4SI_type_node,
15158 V4SI_type_node, integer_type_node, NULL_TREE);
15159 tree v8hi_ftype_v8hi_int
15160 = build_function_type_list (V8HI_type_node,
15161 V8HI_type_node, integer_type_node, NULL_TREE);
15162 tree v8hi_ftype_v8hi_v2di
15163 = build_function_type_list (V8HI_type_node,
15164 V8HI_type_node, V2DI_type_node, NULL_TREE);
15165 tree v4si_ftype_v4si_v2di
15166 = build_function_type_list (V4SI_type_node,
15167 V4SI_type_node, V2DI_type_node, NULL_TREE);
15168 tree v4si_ftype_v8hi_v8hi
15169 = build_function_type_list (V4SI_type_node,
15170 V8HI_type_node, V8HI_type_node, NULL_TREE);
15171 tree di_ftype_v8qi_v8qi
15172 = build_function_type_list (long_long_unsigned_type_node,
15173 V8QI_type_node, V8QI_type_node, NULL_TREE);
15174 tree di_ftype_v2si_v2si
15175 = build_function_type_list (long_long_unsigned_type_node,
15176 V2SI_type_node, V2SI_type_node, NULL_TREE);
15177 tree v2di_ftype_v16qi_v16qi
15178 = build_function_type_list (V2DI_type_node,
15179 V16QI_type_node, V16QI_type_node, NULL_TREE);
15180 tree v2di_ftype_v4si_v4si
15181 = build_function_type_list (V2DI_type_node,
15182 V4SI_type_node, V4SI_type_node, NULL_TREE);
15183 tree int_ftype_v16qi
15184 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15185 tree v16qi_ftype_pcchar
15186 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15187 tree void_ftype_pchar_v16qi
15188 = build_function_type_list (void_type_node,
15189 pchar_type_node, V16QI_type_node, NULL_TREE);
15192 tree float128_type;
15195 /* The __float80 type. */
15196 if (TYPE_MODE (long_double_type_node) == XFmode)
15197 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15201 /* The __float80 type. */
15202 float80_type = make_node (REAL_TYPE);
15203 TYPE_PRECISION (float80_type) = 80;
15204 layout_type (float80_type);
15205 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15210 float128_type = make_node (REAL_TYPE);
15211 TYPE_PRECISION (float128_type) = 128;
15212 layout_type (float128_type);
15213 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15216 /* Add all builtins that are more or less simple operations on two
15218 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15220 /* Use one of the operands; the target can have a different mode for
15221 mask-generating compares. */
15222 enum machine_mode mode;
15227 mode = insn_data[d->icode].operand[1].mode;
15232 type = v16qi_ftype_v16qi_v16qi;
15235 type = v8hi_ftype_v8hi_v8hi;
15238 type = v4si_ftype_v4si_v4si;
15241 type = v2di_ftype_v2di_v2di;
15244 type = v2df_ftype_v2df_v2df;
15247 type = v4sf_ftype_v4sf_v4sf;
15250 type = v8qi_ftype_v8qi_v8qi;
15253 type = v4hi_ftype_v4hi_v4hi;
15256 type = v2si_ftype_v2si_v2si;
15259 type = di_ftype_di_di;
15263 gcc_unreachable ();
15266 /* Override for comparisons. */
15267 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15268 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15269 type = v4si_ftype_v4sf_v4sf;
15271 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15272 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15273 type = v2di_ftype_v2df_v2df;
15275 def_builtin (d->mask, d->name, type, d->code);
15278 /* Add the remaining MMX insns with somewhat more complicated types. */
15279 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15280 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15281 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15282 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15284 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15285 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15286 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15288 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15289 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15291 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15292 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15294 /* comi/ucomi insns. */
15295 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15296 if (d->mask == MASK_SSE2)
15297 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15299 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15301 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15302 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15303 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15305 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15306 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15307 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15308 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15309 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15310 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15311 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15312 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15313 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15314 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15315 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15317 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15319 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15320 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15322 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15323 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15324 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15325 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15327 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15328 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15329 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15330 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15332 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15334 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15336 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15337 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15338 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15339 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15340 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15341 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15343 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15345 /* Original 3DNow! */
15346 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15347 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15348 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15349 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15350 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15351 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15352 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15353 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15354 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15355 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15356 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15357 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15358 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15359 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15360 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15361 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15362 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15363 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15364 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15365 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15367 /* 3DNow! extension as used in the Athlon CPU. */
15368 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15369 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15370 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15371 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15372 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15373 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15376 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15378 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15379 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15381 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15382 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15384 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15385 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15386 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15387 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15388 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15390 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15391 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15392 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15393 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15395 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15396 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15398 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15400 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15401 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15403 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15404 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15405 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15406 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15407 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15409 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15411 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15412 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15413 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15414 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15416 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15417 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15418 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15420 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15421 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15422 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15423 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15425 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15426 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15427 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15429 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15430 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15432 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15433 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15435 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15436 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15437 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15439 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15440 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15441 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15443 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15444 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15446 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15447 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15448 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15449 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15451 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15452 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15453 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15454 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15456 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15457 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15459 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15461 /* Prescott New Instructions. */
15462 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15463 void_ftype_pcvoid_unsigned_unsigned,
15464 IX86_BUILTIN_MONITOR);
15465 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15466 void_ftype_unsigned_unsigned,
15467 IX86_BUILTIN_MWAIT);
15468 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15470 IX86_BUILTIN_MOVSHDUP);
15471 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15473 IX86_BUILTIN_MOVSLDUP);
15474 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15475 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15477 /* Access to the vec_init patterns. */
15478 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15479 integer_type_node, NULL_TREE);
15480 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15481 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15483 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15484 short_integer_type_node,
15485 short_integer_type_node,
15486 short_integer_type_node, NULL_TREE);
15487 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15488 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15490 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15491 char_type_node, char_type_node,
15492 char_type_node, char_type_node,
15493 char_type_node, char_type_node,
15494 char_type_node, NULL_TREE);
15495 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15496 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15498 /* Access to the vec_extract patterns. */
15499 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15500 integer_type_node, NULL_TREE);
15501 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15502 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15504 ftype = build_function_type_list (long_long_integer_type_node,
15505 V2DI_type_node, integer_type_node,
15507 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15508 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15510 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15511 integer_type_node, NULL_TREE);
15512 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15513 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15515 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15516 integer_type_node, NULL_TREE);
15517 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15518 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15520 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15521 integer_type_node, NULL_TREE);
15522 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15523 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15525 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15526 integer_type_node, NULL_TREE);
15527 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15528 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15530 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15531 integer_type_node, NULL_TREE);
15532 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15533 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15535 /* Access to the vec_set patterns. */
15536 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15538 integer_type_node, NULL_TREE);
15539 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15540 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15542 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15544 integer_type_node, NULL_TREE);
15545 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15546 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15549 /* Errors in the source file can cause expand_expr to return const0_rtx
15550 where we expect a vector. To avoid crashing, use one of the vector
15551 clear instructions. */
15553 safe_vector_operand (rtx x, enum machine_mode mode)
15555 if (x == const0_rtx)
15556 x = CONST0_RTX (mode);
15560 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15563 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15566 tree arg0 = TREE_VALUE (arglist);
15567 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15568 rtx op0 = expand_normal (arg0);
15569 rtx op1 = expand_normal (arg1);
15570 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15571 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15572 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15574 if (VECTOR_MODE_P (mode0))
15575 op0 = safe_vector_operand (op0, mode0);
15576 if (VECTOR_MODE_P (mode1))
15577 op1 = safe_vector_operand (op1, mode1);
15579 if (optimize || !target
15580 || GET_MODE (target) != tmode
15581 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15582 target = gen_reg_rtx (tmode);
15584 if (GET_MODE (op1) == SImode && mode1 == TImode)
15586 rtx x = gen_reg_rtx (V4SImode);
15587 emit_insn (gen_sse2_loadd (x, op1));
15588 op1 = gen_lowpart (TImode, x);
15591 /* The insn must want input operands in the same modes as the
15593 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15594 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15596 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15597 op0 = copy_to_mode_reg (mode0, op0);
15598 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15599 op1 = copy_to_mode_reg (mode1, op1);
15601 /* ??? Using ix86_fixup_binary_operands is problematic when
15602 we've got mismatched modes. Fake it. */
15608 if (tmode == mode0 && tmode == mode1)
15610 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15614 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15616 op0 = force_reg (mode0, op0);
15617 op1 = force_reg (mode1, op1);
15618 target = gen_reg_rtx (tmode);
15621 pat = GEN_FCN (icode) (target, op0, op1);
15628 /* Subroutine of ix86_expand_builtin to take care of stores. */
15631 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15634 tree arg0 = TREE_VALUE (arglist);
15635 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15636 rtx op0 = expand_normal (arg0);
15637 rtx op1 = expand_normal (arg1);
15638 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15639 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15641 if (VECTOR_MODE_P (mode1))
15642 op1 = safe_vector_operand (op1, mode1);
15644 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15645 op1 = copy_to_mode_reg (mode1, op1);
15647 pat = GEN_FCN (icode) (op0, op1);
15653 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15656 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15657 rtx target, int do_load)
15660 tree arg0 = TREE_VALUE (arglist);
15661 rtx op0 = expand_normal (arg0);
15662 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15663 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15665 if (optimize || !target
15666 || GET_MODE (target) != tmode
15667 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15668 target = gen_reg_rtx (tmode);
15670 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15673 if (VECTOR_MODE_P (mode0))
15674 op0 = safe_vector_operand (op0, mode0);
15676 if ((optimize && !register_operand (op0, mode0))
15677 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15678 op0 = copy_to_mode_reg (mode0, op0);
15681 pat = GEN_FCN (icode) (target, op0);
15688 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15689 sqrtss, rsqrtss, rcpss. */
15692 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15695 tree arg0 = TREE_VALUE (arglist);
15696 rtx op1, op0 = expand_normal (arg0);
15697 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15698 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15700 if (optimize || !target
15701 || GET_MODE (target) != tmode
15702 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15703 target = gen_reg_rtx (tmode);
15705 if (VECTOR_MODE_P (mode0))
15706 op0 = safe_vector_operand (op0, mode0);
15708 if ((optimize && !register_operand (op0, mode0))
15709 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15710 op0 = copy_to_mode_reg (mode0, op0);
15713 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15714 op1 = copy_to_mode_reg (mode0, op1);
15716 pat = GEN_FCN (icode) (target, op0, op1);
15723 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15726 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15730 tree arg0 = TREE_VALUE (arglist);
15731 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15732 rtx op0 = expand_normal (arg0);
15733 rtx op1 = expand_normal (arg1);
15735 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15736 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15737 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15738 enum rtx_code comparison = d->comparison;
15740 if (VECTOR_MODE_P (mode0))
15741 op0 = safe_vector_operand (op0, mode0);
15742 if (VECTOR_MODE_P (mode1))
15743 op1 = safe_vector_operand (op1, mode1);
15745 /* Swap operands if we have a comparison that isn't available in
15747 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15749 rtx tmp = gen_reg_rtx (mode1);
15750 emit_move_insn (tmp, op1);
15755 if (optimize || !target
15756 || GET_MODE (target) != tmode
15757 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15758 target = gen_reg_rtx (tmode);
15760 if ((optimize && !register_operand (op0, mode0))
15761 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15762 op0 = copy_to_mode_reg (mode0, op0);
15763 if ((optimize && !register_operand (op1, mode1))
15764 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15765 op1 = copy_to_mode_reg (mode1, op1);
15767 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15768 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15775 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15778 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15782 tree arg0 = TREE_VALUE (arglist);
15783 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15784 rtx op0 = expand_normal (arg0);
15785 rtx op1 = expand_normal (arg1);
15787 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15788 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15789 enum rtx_code comparison = d->comparison;
15791 if (VECTOR_MODE_P (mode0))
15792 op0 = safe_vector_operand (op0, mode0);
15793 if (VECTOR_MODE_P (mode1))
15794 op1 = safe_vector_operand (op1, mode1);
15796 /* Swap operands if we have a comparison that isn't available in
15798 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15805 target = gen_reg_rtx (SImode);
15806 emit_move_insn (target, const0_rtx);
15807 target = gen_rtx_SUBREG (QImode, target, 0);
15809 if ((optimize && !register_operand (op0, mode0))
15810 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15811 op0 = copy_to_mode_reg (mode0, op0);
15812 if ((optimize && !register_operand (op1, mode1))
15813 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15814 op1 = copy_to_mode_reg (mode1, op1);
15816 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15817 pat = GEN_FCN (d->icode) (op0, op1);
15821 emit_insn (gen_rtx_SET (VOIDmode,
15822 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15823 gen_rtx_fmt_ee (comparison, QImode,
15827 return SUBREG_REG (target);
15830 /* Return the integer constant in ARG. Constrain it to be in the range
15831 of the subparts of VEC_TYPE; issue an error if not. */
15834 get_element_number (tree vec_type, tree arg)
15836 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15838 if (!host_integerp (arg, 1)
15839 || (elt = tree_low_cst (arg, 1), elt > max))
15841 error ("selector must be an integer constant in the range 0..%wi", max);
15848 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15849 ix86_expand_vector_init. We DO have language-level syntax for this, in
15850 the form of (type){ init-list }. Except that since we can't place emms
15851 instructions from inside the compiler, we can't allow the use of MMX
15852 registers unless the user explicitly asks for it. So we do *not* define
15853 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15854 we have builtins invoked by mmintrin.h that gives us license to emit
15855 these sorts of instructions. */
15858 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15860 enum machine_mode tmode = TYPE_MODE (type);
15861 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15862 int i, n_elt = GET_MODE_NUNITS (tmode);
15863 rtvec v = rtvec_alloc (n_elt);
15865 gcc_assert (VECTOR_MODE_P (tmode));
15867 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15869 rtx x = expand_normal (TREE_VALUE (arglist));
15870 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15873 gcc_assert (arglist == NULL);
15875 if (!target || !register_operand (target, tmode))
15876 target = gen_reg_rtx (tmode);
15878 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15882 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15883 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15884 had a language-level syntax for referencing vector elements. */
15887 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15889 enum machine_mode tmode, mode0;
15894 arg0 = TREE_VALUE (arglist);
15895 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15897 op0 = expand_normal (arg0);
15898 elt = get_element_number (TREE_TYPE (arg0), arg1);
15900 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15901 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15902 gcc_assert (VECTOR_MODE_P (mode0));
15904 op0 = force_reg (mode0, op0);
15906 if (optimize || !target || !register_operand (target, tmode))
15907 target = gen_reg_rtx (tmode);
15909 ix86_expand_vector_extract (true, target, op0, elt);
15914 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15915 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15916 a language-level syntax for referencing vector elements. */
15919 ix86_expand_vec_set_builtin (tree arglist)
15921 enum machine_mode tmode, mode1;
15922 tree arg0, arg1, arg2;
15926 arg0 = TREE_VALUE (arglist);
15927 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15928 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15930 tmode = TYPE_MODE (TREE_TYPE (arg0));
15931 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15932 gcc_assert (VECTOR_MODE_P (tmode));
15934 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15935 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15936 elt = get_element_number (TREE_TYPE (arg0), arg2);
15938 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15939 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15941 op0 = force_reg (tmode, op0);
15942 op1 = force_reg (mode1, op1);
15944 ix86_expand_vector_set (true, op0, op1, elt);
15949 /* Expand an expression EXP that calls a built-in function,
15950 with result going to TARGET if that's convenient
15951 (and in mode MODE if that's convenient).
15952 SUBTARGET may be used as the target for computing one of EXP's operands.
15953 IGNORE is nonzero if the value is to be ignored. */
15956 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15957 enum machine_mode mode ATTRIBUTE_UNUSED,
15958 int ignore ATTRIBUTE_UNUSED)
15960 const struct builtin_description *d;
15962 enum insn_code icode;
15963 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
15964 tree arglist = TREE_OPERAND (exp, 1);
15965 tree arg0, arg1, arg2;
15966 rtx op0, op1, op2, pat;
15967 enum machine_mode tmode, mode0, mode1, mode2;
15968 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15972 case IX86_BUILTIN_EMMS:
15973 emit_insn (gen_mmx_emms ());
15976 case IX86_BUILTIN_SFENCE:
15977 emit_insn (gen_sse_sfence ());
15980 case IX86_BUILTIN_MASKMOVQ:
15981 case IX86_BUILTIN_MASKMOVDQU:
15982 icode = (fcode == IX86_BUILTIN_MASKMOVQ
15983 ? CODE_FOR_mmx_maskmovq
15984 : CODE_FOR_sse2_maskmovdqu);
15985 /* Note the arg order is different from the operand order. */
15986 arg1 = TREE_VALUE (arglist);
15987 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
15988 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15989 op0 = expand_normal (arg0);
15990 op1 = expand_normal (arg1);
15991 op2 = expand_normal (arg2);
15992 mode0 = insn_data[icode].operand[0].mode;
15993 mode1 = insn_data[icode].operand[1].mode;
15994 mode2 = insn_data[icode].operand[2].mode;
15996 op0 = force_reg (Pmode, op0);
15997 op0 = gen_rtx_MEM (mode1, op0);
15999 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16000 op0 = copy_to_mode_reg (mode0, op0);
16001 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16002 op1 = copy_to_mode_reg (mode1, op1);
16003 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16004 op2 = copy_to_mode_reg (mode2, op2);
16005 pat = GEN_FCN (icode) (op0, op1, op2);
16011 case IX86_BUILTIN_SQRTSS:
16012 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16013 case IX86_BUILTIN_RSQRTSS:
16014 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16015 case IX86_BUILTIN_RCPSS:
16016 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16018 case IX86_BUILTIN_LOADUPS:
16019 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16021 case IX86_BUILTIN_STOREUPS:
16022 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16024 case IX86_BUILTIN_LOADHPS:
16025 case IX86_BUILTIN_LOADLPS:
16026 case IX86_BUILTIN_LOADHPD:
16027 case IX86_BUILTIN_LOADLPD:
16028 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16029 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16030 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16031 : CODE_FOR_sse2_loadlpd);
16032 arg0 = TREE_VALUE (arglist);
16033 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16034 op0 = expand_normal (arg0);
16035 op1 = expand_normal (arg1);
16036 tmode = insn_data[icode].operand[0].mode;
16037 mode0 = insn_data[icode].operand[1].mode;
16038 mode1 = insn_data[icode].operand[2].mode;
16040 op0 = force_reg (mode0, op0);
16041 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16042 if (optimize || target == 0
16043 || GET_MODE (target) != tmode
16044 || !register_operand (target, tmode))
16045 target = gen_reg_rtx (tmode);
16046 pat = GEN_FCN (icode) (target, op0, op1);
16052 case IX86_BUILTIN_STOREHPS:
16053 case IX86_BUILTIN_STORELPS:
16054 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16055 : CODE_FOR_sse_storelps);
16056 arg0 = TREE_VALUE (arglist);
16057 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16058 op0 = expand_normal (arg0);
16059 op1 = expand_normal (arg1);
16060 mode0 = insn_data[icode].operand[0].mode;
16061 mode1 = insn_data[icode].operand[1].mode;
16063 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16064 op1 = force_reg (mode1, op1);
16066 pat = GEN_FCN (icode) (op0, op1);
16072 case IX86_BUILTIN_MOVNTPS:
16073 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16074 case IX86_BUILTIN_MOVNTQ:
16075 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16077 case IX86_BUILTIN_LDMXCSR:
16078 op0 = expand_normal (TREE_VALUE (arglist));
16079 target = assign_386_stack_local (SImode, SLOT_TEMP);
16080 emit_move_insn (target, op0);
16081 emit_insn (gen_sse_ldmxcsr (target));
16084 case IX86_BUILTIN_STMXCSR:
16085 target = assign_386_stack_local (SImode, SLOT_TEMP);
16086 emit_insn (gen_sse_stmxcsr (target));
16087 return copy_to_mode_reg (SImode, target);
16089 case IX86_BUILTIN_SHUFPS:
16090 case IX86_BUILTIN_SHUFPD:
16091 icode = (fcode == IX86_BUILTIN_SHUFPS
16092 ? CODE_FOR_sse_shufps
16093 : CODE_FOR_sse2_shufpd);
16094 arg0 = TREE_VALUE (arglist);
16095 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16096 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16097 op0 = expand_normal (arg0);
16098 op1 = expand_normal (arg1);
16099 op2 = expand_normal (arg2);
16100 tmode = insn_data[icode].operand[0].mode;
16101 mode0 = insn_data[icode].operand[1].mode;
16102 mode1 = insn_data[icode].operand[2].mode;
16103 mode2 = insn_data[icode].operand[3].mode;
16105 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16106 op0 = copy_to_mode_reg (mode0, op0);
16107 if ((optimize && !register_operand (op1, mode1))
16108 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16109 op1 = copy_to_mode_reg (mode1, op1);
16110 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16112 /* @@@ better error message */
16113 error ("mask must be an immediate");
16114 return gen_reg_rtx (tmode);
16116 if (optimize || target == 0
16117 || GET_MODE (target) != tmode
16118 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16119 target = gen_reg_rtx (tmode);
16120 pat = GEN_FCN (icode) (target, op0, op1, op2);
16126 case IX86_BUILTIN_PSHUFW:
16127 case IX86_BUILTIN_PSHUFD:
16128 case IX86_BUILTIN_PSHUFHW:
16129 case IX86_BUILTIN_PSHUFLW:
16130 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16131 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16132 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16133 : CODE_FOR_mmx_pshufw);
16134 arg0 = TREE_VALUE (arglist);
16135 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16136 op0 = expand_normal (arg0);
16137 op1 = expand_normal (arg1);
16138 tmode = insn_data[icode].operand[0].mode;
16139 mode1 = insn_data[icode].operand[1].mode;
16140 mode2 = insn_data[icode].operand[2].mode;
16142 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16143 op0 = copy_to_mode_reg (mode1, op0);
16144 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16146 /* @@@ better error message */
16147 error ("mask must be an immediate");
16151 || GET_MODE (target) != tmode
16152 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16153 target = gen_reg_rtx (tmode);
16154 pat = GEN_FCN (icode) (target, op0, op1);
16160 case IX86_BUILTIN_PSLLDQI128:
16161 case IX86_BUILTIN_PSRLDQI128:
16162 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16163 : CODE_FOR_sse2_lshrti3);
16164 arg0 = TREE_VALUE (arglist);
16165 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16166 op0 = expand_normal (arg0);
16167 op1 = expand_normal (arg1);
16168 tmode = insn_data[icode].operand[0].mode;
16169 mode1 = insn_data[icode].operand[1].mode;
16170 mode2 = insn_data[icode].operand[2].mode;
16172 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16174 op0 = copy_to_reg (op0);
16175 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16177 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16179 error ("shift must be an immediate");
16182 target = gen_reg_rtx (V2DImode);
16183 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16189 case IX86_BUILTIN_FEMMS:
16190 emit_insn (gen_mmx_femms ());
16193 case IX86_BUILTIN_PAVGUSB:
16194 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16196 case IX86_BUILTIN_PF2ID:
16197 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16199 case IX86_BUILTIN_PFACC:
16200 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16202 case IX86_BUILTIN_PFADD:
16203 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16205 case IX86_BUILTIN_PFCMPEQ:
16206 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16208 case IX86_BUILTIN_PFCMPGE:
16209 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16211 case IX86_BUILTIN_PFCMPGT:
16212 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16214 case IX86_BUILTIN_PFMAX:
16215 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16217 case IX86_BUILTIN_PFMIN:
16218 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16220 case IX86_BUILTIN_PFMUL:
16221 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16223 case IX86_BUILTIN_PFRCP:
16224 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16226 case IX86_BUILTIN_PFRCPIT1:
16227 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16229 case IX86_BUILTIN_PFRCPIT2:
16230 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16232 case IX86_BUILTIN_PFRSQIT1:
16233 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16235 case IX86_BUILTIN_PFRSQRT:
16236 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16238 case IX86_BUILTIN_PFSUB:
16239 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16241 case IX86_BUILTIN_PFSUBR:
16242 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16244 case IX86_BUILTIN_PI2FD:
16245 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16247 case IX86_BUILTIN_PMULHRW:
16248 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16250 case IX86_BUILTIN_PF2IW:
16251 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16253 case IX86_BUILTIN_PFNACC:
16254 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16256 case IX86_BUILTIN_PFPNACC:
16257 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16259 case IX86_BUILTIN_PI2FW:
16260 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16262 case IX86_BUILTIN_PSWAPDSI:
16263 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16265 case IX86_BUILTIN_PSWAPDSF:
16266 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16268 case IX86_BUILTIN_SQRTSD:
16269 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16270 case IX86_BUILTIN_LOADUPD:
16271 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16272 case IX86_BUILTIN_STOREUPD:
16273 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16275 case IX86_BUILTIN_MFENCE:
16276 emit_insn (gen_sse2_mfence ());
16278 case IX86_BUILTIN_LFENCE:
16279 emit_insn (gen_sse2_lfence ());
16282 case IX86_BUILTIN_CLFLUSH:
16283 arg0 = TREE_VALUE (arglist);
16284 op0 = expand_normal (arg0);
16285 icode = CODE_FOR_sse2_clflush;
16286 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16287 op0 = copy_to_mode_reg (Pmode, op0);
16289 emit_insn (gen_sse2_clflush (op0));
16292 case IX86_BUILTIN_MOVNTPD:
16293 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16294 case IX86_BUILTIN_MOVNTDQ:
16295 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16296 case IX86_BUILTIN_MOVNTI:
16297 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16299 case IX86_BUILTIN_LOADDQU:
16300 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16301 case IX86_BUILTIN_STOREDQU:
16302 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16304 case IX86_BUILTIN_MONITOR:
16305 arg0 = TREE_VALUE (arglist);
16306 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16307 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16308 op0 = expand_normal (arg0);
16309 op1 = expand_normal (arg1);
16310 op2 = expand_normal (arg2);
16312 op0 = copy_to_mode_reg (Pmode, op0);
16314 op1 = copy_to_mode_reg (SImode, op1);
16316 op2 = copy_to_mode_reg (SImode, op2);
16318 emit_insn (gen_sse3_monitor (op0, op1, op2));
16320 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16323 case IX86_BUILTIN_MWAIT:
16324 arg0 = TREE_VALUE (arglist);
16325 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16326 op0 = expand_normal (arg0);
16327 op1 = expand_normal (arg1);
16329 op0 = copy_to_mode_reg (SImode, op0);
16331 op1 = copy_to_mode_reg (SImode, op1);
16332 emit_insn (gen_sse3_mwait (op0, op1));
16335 case IX86_BUILTIN_LDDQU:
16336 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16339 case IX86_BUILTIN_VEC_INIT_V2SI:
16340 case IX86_BUILTIN_VEC_INIT_V4HI:
16341 case IX86_BUILTIN_VEC_INIT_V8QI:
16342 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16344 case IX86_BUILTIN_VEC_EXT_V2DF:
16345 case IX86_BUILTIN_VEC_EXT_V2DI:
16346 case IX86_BUILTIN_VEC_EXT_V4SF:
16347 case IX86_BUILTIN_VEC_EXT_V4SI:
16348 case IX86_BUILTIN_VEC_EXT_V8HI:
16349 case IX86_BUILTIN_VEC_EXT_V2SI:
16350 case IX86_BUILTIN_VEC_EXT_V4HI:
16351 return ix86_expand_vec_ext_builtin (arglist, target);
16353 case IX86_BUILTIN_VEC_SET_V8HI:
16354 case IX86_BUILTIN_VEC_SET_V4HI:
16355 return ix86_expand_vec_set_builtin (arglist);
16361 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16362 if (d->code == fcode)
16364 /* Compares are treated specially. */
16365 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16366 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16367 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16368 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16369 return ix86_expand_sse_compare (d, arglist, target);
16371 return ix86_expand_binop_builtin (d->icode, arglist, target);
16374 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16375 if (d->code == fcode)
16376 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16378 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16379 if (d->code == fcode)
16380 return ix86_expand_sse_comi (d, arglist, target);
16382 gcc_unreachable ();
16385 /* Store OPERAND to the memory after reload is completed. This means
16386 that we can't easily use assign_stack_local. */
16388 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16392 gcc_assert (reload_completed);
16393 if (TARGET_RED_ZONE)
16395 result = gen_rtx_MEM (mode,
16396 gen_rtx_PLUS (Pmode,
16398 GEN_INT (-RED_ZONE_SIZE)));
16399 emit_move_insn (result, operand);
16401 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16407 operand = gen_lowpart (DImode, operand);
16411 gen_rtx_SET (VOIDmode,
16412 gen_rtx_MEM (DImode,
16413 gen_rtx_PRE_DEC (DImode,
16414 stack_pointer_rtx)),
16418 gcc_unreachable ();
16420 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16429 split_di (&operand, 1, operands, operands + 1);
16431 gen_rtx_SET (VOIDmode,
16432 gen_rtx_MEM (SImode,
16433 gen_rtx_PRE_DEC (Pmode,
16434 stack_pointer_rtx)),
16437 gen_rtx_SET (VOIDmode,
16438 gen_rtx_MEM (SImode,
16439 gen_rtx_PRE_DEC (Pmode,
16440 stack_pointer_rtx)),
16445 /* Store HImodes as SImodes. */
16446 operand = gen_lowpart (SImode, operand);
16450 gen_rtx_SET (VOIDmode,
16451 gen_rtx_MEM (GET_MODE (operand),
16452 gen_rtx_PRE_DEC (SImode,
16453 stack_pointer_rtx)),
16457 gcc_unreachable ();
16459 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16464 /* Free operand from the memory. */
16466 ix86_free_from_memory (enum machine_mode mode)
16468 if (!TARGET_RED_ZONE)
16472 if (mode == DImode || TARGET_64BIT)
16476 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16477 to pop or add instruction if registers are available. */
16478 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16479 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16484 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16485 QImode must go into class Q_REGS.
16486 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16487 movdf to do mem-to-mem moves through integer regs. */
16489 ix86_preferred_reload_class (rtx x, enum reg_class class)
16491 enum machine_mode mode = GET_MODE (x);
16493 /* We're only allowed to return a subclass of CLASS. Many of the
16494 following checks fail for NO_REGS, so eliminate that early. */
16495 if (class == NO_REGS)
16498 /* All classes can load zeros. */
16499 if (x == CONST0_RTX (mode))
16502 /* Force constants into memory if we are loading a (nonzero) constant into
16503 an MMX or SSE register. This is because there are no MMX/SSE instructions
16504 to load from a constant. */
16506 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16509 /* Prefer SSE regs only, if we can use them for math. */
16510 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16511 return SSE_CLASS_P (class) ? class : NO_REGS;
16513 /* Floating-point constants need more complex checks. */
16514 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16516 /* General regs can load everything. */
16517 if (reg_class_subset_p (class, GENERAL_REGS))
16520 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16521 zero above. We only want to wind up preferring 80387 registers if
16522 we plan on doing computation with them. */
16524 && standard_80387_constant_p (x))
16526 /* Limit class to non-sse. */
16527 if (class == FLOAT_SSE_REGS)
16529 if (class == FP_TOP_SSE_REGS)
16531 if (class == FP_SECOND_SSE_REGS)
16532 return FP_SECOND_REG;
16533 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16540 /* Generally when we see PLUS here, it's the function invariant
16541 (plus soft-fp const_int). Which can only be computed into general
16543 if (GET_CODE (x) == PLUS)
16544 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16546 /* QImode constants are easy to load, but non-constant QImode data
16547 must go into Q_REGS. */
16548 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16550 if (reg_class_subset_p (class, Q_REGS))
16552 if (reg_class_subset_p (Q_REGS, class))
16560 /* Discourage putting floating-point values in SSE registers unless
16561 SSE math is being used, and likewise for the 387 registers. */
16563 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16565 enum machine_mode mode = GET_MODE (x);
16567 /* Restrict the output reload class to the register bank that we are doing
16568 math on. If we would like not to return a subset of CLASS, reject this
16569 alternative: if reload cannot do this, it will still use its choice. */
16570 mode = GET_MODE (x);
16571 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16572 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16574 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16576 if (class == FP_TOP_SSE_REGS)
16578 else if (class == FP_SECOND_SSE_REGS)
16579 return FP_SECOND_REG;
16581 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16587 /* If we are copying between general and FP registers, we need a memory
16588 location. The same is true for SSE and MMX registers.
16590 The macro can't work reliably when one of the CLASSES is class containing
16591 registers from multiple units (SSE, MMX, integer). We avoid this by never
16592 combining those units in single alternative in the machine description.
16593 Ensure that this constraint holds to avoid unexpected surprises.
16595 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16596 enforce these sanity checks. */
16599 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16600 enum machine_mode mode, int strict)
16602 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16603 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16604 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16605 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16606 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16607 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16609 gcc_assert (!strict);
16613 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16616 /* ??? This is a lie. We do have moves between mmx/general, and for
16617 mmx/sse2. But by saying we need secondary memory we discourage the
16618 register allocator from using the mmx registers unless needed. */
16619 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16622 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16624 /* SSE1 doesn't have any direct moves from other classes. */
16628 /* If the target says that inter-unit moves are more expensive
16629 than moving through memory, then don't generate them. */
16630 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16633 /* Between SSE and general, we have moves no larger than word size. */
16634 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16637 /* ??? For the cost of one register reformat penalty, we could use
16638 the same instructions to move SFmode and DFmode data, but the
16639 relevant move patterns don't support those alternatives. */
16640 if (mode == SFmode || mode == DFmode)
16647 /* Return true if the registers in CLASS cannot represent the change from
16648 modes FROM to TO. */
16651 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16652 enum reg_class class)
16657 /* x87 registers can't do subreg at all, as all values are reformatted
16658 to extended precision. */
16659 if (MAYBE_FLOAT_CLASS_P (class))
16662 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16664 /* Vector registers do not support QI or HImode loads. If we don't
16665 disallow a change to these modes, reload will assume it's ok to
16666 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16667 the vec_dupv4hi pattern. */
16668 if (GET_MODE_SIZE (from) < 4)
16671 /* Vector registers do not support subreg with nonzero offsets, which
16672 are otherwise valid for integer registers. Since we can't see
16673 whether we have a nonzero offset from here, prohibit all
16674 nonparadoxical subregs changing size. */
16675 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16682 /* Return the cost of moving data from a register in class CLASS1 to
16683 one in class CLASS2.
16685 It is not required that the cost always equal 2 when FROM is the same as TO;
16686 on some machines it is expensive to move between registers if they are not
16687 general registers. */
16690 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16691 enum reg_class class2)
16693 /* In case we require secondary memory, compute cost of the store followed
16694 by load. In order to avoid bad register allocation choices, we need
16695 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16697 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16701 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16702 MEMORY_MOVE_COST (mode, class1, 1));
16703 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16704 MEMORY_MOVE_COST (mode, class2, 1));
16706 /* In case of copying from general_purpose_register we may emit multiple
16707 stores followed by single load causing memory size mismatch stall.
16708 Count this as arbitrarily high cost of 20. */
16709 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16712 /* In the case of FP/MMX moves, the registers actually overlap, and we
16713 have to switch modes in order to treat them differently. */
16714 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16715 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16721 /* Moves between SSE/MMX and integer unit are expensive. */
16722 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16723 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16724 return ix86_cost->mmxsse_to_integer;
16725 if (MAYBE_FLOAT_CLASS_P (class1))
16726 return ix86_cost->fp_move;
16727 if (MAYBE_SSE_CLASS_P (class1))
16728 return ix86_cost->sse_move;
16729 if (MAYBE_MMX_CLASS_P (class1))
16730 return ix86_cost->mmx_move;
16734 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16737 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16739 /* Flags and only flags can only hold CCmode values. */
16740 if (CC_REGNO_P (regno))
16741 return GET_MODE_CLASS (mode) == MODE_CC;
16742 if (GET_MODE_CLASS (mode) == MODE_CC
16743 || GET_MODE_CLASS (mode) == MODE_RANDOM
16744 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16746 if (FP_REGNO_P (regno))
16747 return VALID_FP_MODE_P (mode);
16748 if (SSE_REGNO_P (regno))
16750 /* We implement the move patterns for all vector modes into and
16751 out of SSE registers, even when no operation instructions
16753 return (VALID_SSE_REG_MODE (mode)
16754 || VALID_SSE2_REG_MODE (mode)
16755 || VALID_MMX_REG_MODE (mode)
16756 || VALID_MMX_REG_MODE_3DNOW (mode));
16758 if (MMX_REGNO_P (regno))
16760 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16761 so if the register is available at all, then we can move data of
16762 the given mode into or out of it. */
16763 return (VALID_MMX_REG_MODE (mode)
16764 || VALID_MMX_REG_MODE_3DNOW (mode));
16767 if (mode == QImode)
16769 /* Take care for QImode values - they can be in non-QI regs,
16770 but then they do cause partial register stalls. */
16771 if (regno < 4 || TARGET_64BIT)
16773 if (!TARGET_PARTIAL_REG_STALL)
16775 return reload_in_progress || reload_completed;
16777 /* We handle both integer and floats in the general purpose registers. */
16778 else if (VALID_INT_MODE_P (mode))
16780 else if (VALID_FP_MODE_P (mode))
16782 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16783 on to use that value in smaller contexts, this can easily force a
16784 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16785 supporting DImode, allow it. */
16786 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16792 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16793 tieable integer mode. */
16796 ix86_tieable_integer_mode_p (enum machine_mode mode)
16805 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16808 return TARGET_64BIT;
16815 /* Return true if MODE1 is accessible in a register that can hold MODE2
16816 without copying. That is, all register classes that can hold MODE2
16817 can also hold MODE1. */
16820 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16822 if (mode1 == mode2)
16825 if (ix86_tieable_integer_mode_p (mode1)
16826 && ix86_tieable_integer_mode_p (mode2))
16829 /* MODE2 being XFmode implies fp stack or general regs, which means we
16830 can tie any smaller floating point modes to it. Note that we do not
16831 tie this with TFmode. */
16832 if (mode2 == XFmode)
16833 return mode1 == SFmode || mode1 == DFmode;
16835 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16836 that we can tie it with SFmode. */
16837 if (mode2 == DFmode)
16838 return mode1 == SFmode;
16840 /* If MODE2 is only appropriate for an SSE register, then tie with
16841 any other mode acceptable to SSE registers. */
16842 if (GET_MODE_SIZE (mode2) >= 8
16843 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16844 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16846 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16847 with any other mode acceptable to MMX registers. */
16848 if (GET_MODE_SIZE (mode2) == 8
16849 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
16850 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
16855 /* Return the cost of moving data of mode M between a
16856 register and memory. A value of 2 is the default; this cost is
16857 relative to those in `REGISTER_MOVE_COST'.
16859 If moving between registers and memory is more expensive than
16860 between two registers, you should define this macro to express the
16863 Model also increased moving costs of QImode registers in non
16867 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
16869 if (FLOAT_CLASS_P (class))
16886 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
16888 if (SSE_CLASS_P (class))
16891 switch (GET_MODE_SIZE (mode))
16905 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
16907 if (MMX_CLASS_P (class))
16910 switch (GET_MODE_SIZE (mode))
16921 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
16923 switch (GET_MODE_SIZE (mode))
16927 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
16928 : ix86_cost->movzbl_load);
16930 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
16931 : ix86_cost->int_store[0] + 4);
16934 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
16936 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
16937 if (mode == TFmode)
16939 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
16940 * (((int) GET_MODE_SIZE (mode)
16941 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
16945 /* Compute a (partial) cost for rtx X. Return true if the complete
16946 cost has been computed, and false if subexpressions should be
16947 scanned. In either case, *TOTAL contains the cost result. */
16950 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
16952 enum machine_mode mode = GET_MODE (x);
16960 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
16962 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
16964 else if (flag_pic && SYMBOLIC_CONST (x)
16966 || (!GET_CODE (x) != LABEL_REF
16967 && (GET_CODE (x) != SYMBOL_REF
16968 || !SYMBOL_REF_LOCAL_P (x)))))
16975 if (mode == VOIDmode)
16978 switch (standard_80387_constant_p (x))
16983 default: /* Other constants */
16988 /* Start with (MEM (SYMBOL_REF)), since that's where
16989 it'll probably end up. Add a penalty for size. */
16990 *total = (COSTS_N_INSNS (1)
16991 + (flag_pic != 0 && !TARGET_64BIT)
16992 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
16998 /* The zero extensions is often completely free on x86_64, so make
16999 it as cheap as possible. */
17000 if (TARGET_64BIT && mode == DImode
17001 && GET_MODE (XEXP (x, 0)) == SImode)
17003 else if (TARGET_ZERO_EXTEND_WITH_AND)
17004 *total = ix86_cost->add;
17006 *total = ix86_cost->movzx;
17010 *total = ix86_cost->movsx;
17014 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17015 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17017 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17020 *total = ix86_cost->add;
17023 if ((value == 2 || value == 3)
17024 && ix86_cost->lea <= ix86_cost->shift_const)
17026 *total = ix86_cost->lea;
17036 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17038 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17040 if (INTVAL (XEXP (x, 1)) > 32)
17041 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17043 *total = ix86_cost->shift_const * 2;
17047 if (GET_CODE (XEXP (x, 1)) == AND)
17048 *total = ix86_cost->shift_var * 2;
17050 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17055 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17056 *total = ix86_cost->shift_const;
17058 *total = ix86_cost->shift_var;
17063 if (FLOAT_MODE_P (mode))
17065 *total = ix86_cost->fmul;
17070 rtx op0 = XEXP (x, 0);
17071 rtx op1 = XEXP (x, 1);
17073 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17075 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17076 for (nbits = 0; value != 0; value &= value - 1)
17080 /* This is arbitrary. */
17083 /* Compute costs correctly for widening multiplication. */
17084 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17085 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17086 == GET_MODE_SIZE (mode))
17088 int is_mulwiden = 0;
17089 enum machine_mode inner_mode = GET_MODE (op0);
17091 if (GET_CODE (op0) == GET_CODE (op1))
17092 is_mulwiden = 1, op1 = XEXP (op1, 0);
17093 else if (GET_CODE (op1) == CONST_INT)
17095 if (GET_CODE (op0) == SIGN_EXTEND)
17096 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17099 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17103 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17106 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17107 + nbits * ix86_cost->mult_bit
17108 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17117 if (FLOAT_MODE_P (mode))
17118 *total = ix86_cost->fdiv;
17120 *total = ix86_cost->divide[MODE_INDEX (mode)];
17124 if (FLOAT_MODE_P (mode))
17125 *total = ix86_cost->fadd;
17126 else if (GET_MODE_CLASS (mode) == MODE_INT
17127 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17129 if (GET_CODE (XEXP (x, 0)) == PLUS
17130 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17131 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17132 && CONSTANT_P (XEXP (x, 1)))
17134 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17135 if (val == 2 || val == 4 || val == 8)
17137 *total = ix86_cost->lea;
17138 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17139 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17141 *total += rtx_cost (XEXP (x, 1), outer_code);
17145 else if (GET_CODE (XEXP (x, 0)) == MULT
17146 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17148 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17149 if (val == 2 || val == 4 || val == 8)
17151 *total = ix86_cost->lea;
17152 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17153 *total += rtx_cost (XEXP (x, 1), outer_code);
17157 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17159 *total = ix86_cost->lea;
17160 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17161 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17162 *total += rtx_cost (XEXP (x, 1), outer_code);
17169 if (FLOAT_MODE_P (mode))
17171 *total = ix86_cost->fadd;
17179 if (!TARGET_64BIT && mode == DImode)
17181 *total = (ix86_cost->add * 2
17182 + (rtx_cost (XEXP (x, 0), outer_code)
17183 << (GET_MODE (XEXP (x, 0)) != DImode))
17184 + (rtx_cost (XEXP (x, 1), outer_code)
17185 << (GET_MODE (XEXP (x, 1)) != DImode)));
17191 if (FLOAT_MODE_P (mode))
17193 *total = ix86_cost->fchs;
17199 if (!TARGET_64BIT && mode == DImode)
17200 *total = ix86_cost->add * 2;
17202 *total = ix86_cost->add;
17206 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17207 && XEXP (XEXP (x, 0), 1) == const1_rtx
17208 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17209 && XEXP (x, 1) == const0_rtx)
17211 /* This kind of construct is implemented using test[bwl].
17212 Treat it as if we had an AND. */
17213 *total = (ix86_cost->add
17214 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17215 + rtx_cost (const1_rtx, outer_code));
17221 if (!TARGET_SSE_MATH
17223 || (mode == DFmode && !TARGET_SSE2))
17224 /* For standard 80387 constants, raise the cost to prevent
17225 compress_float_constant() to generate load from memory. */
17226 switch (standard_80387_constant_p (XEXP (x, 0)))
17236 *total = (x86_ext_80387_constants & TUNEMASK
17243 if (FLOAT_MODE_P (mode))
17244 *total = ix86_cost->fabs;
17248 if (FLOAT_MODE_P (mode))
17249 *total = ix86_cost->fsqrt;
17253 if (XINT (x, 1) == UNSPEC_TP)
17264 static int current_machopic_label_num;
17266 /* Given a symbol name and its associated stub, write out the
17267 definition of the stub. */
17270 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17272 unsigned int length;
17273 char *binder_name, *symbol_name, lazy_ptr_name[32];
17274 int label = ++current_machopic_label_num;
17276 /* For 64-bit we shouldn't get here. */
17277 gcc_assert (!TARGET_64BIT);
17279 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17280 symb = (*targetm.strip_name_encoding) (symb);
17282 length = strlen (stub);
17283 binder_name = alloca (length + 32);
17284 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17286 length = strlen (symb);
17287 symbol_name = alloca (length + 32);
17288 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17290 sprintf (lazy_ptr_name, "L%d$lz", label);
17293 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17295 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17297 fprintf (file, "%s:\n", stub);
17298 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17302 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17303 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17304 fprintf (file, "\tjmp\t*%%edx\n");
17307 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17309 fprintf (file, "%s:\n", binder_name);
17313 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17314 fprintf (file, "\tpushl\t%%eax\n");
17317 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17319 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17321 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17322 fprintf (file, "%s:\n", lazy_ptr_name);
17323 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17324 fprintf (file, "\t.long %s\n", binder_name);
17328 darwin_x86_file_end (void)
17330 darwin_file_end ();
17333 #endif /* TARGET_MACHO */
17335 /* Order the registers for register allocator. */
17338 x86_order_regs_for_local_alloc (void)
17343 /* First allocate the local general purpose registers. */
17344 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17345 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17346 reg_alloc_order [pos++] = i;
17348 /* Global general purpose registers. */
17349 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17350 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17351 reg_alloc_order [pos++] = i;
17353 /* x87 registers come first in case we are doing FP math
17355 if (!TARGET_SSE_MATH)
17356 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17357 reg_alloc_order [pos++] = i;
17359 /* SSE registers. */
17360 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17361 reg_alloc_order [pos++] = i;
17362 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17363 reg_alloc_order [pos++] = i;
17365 /* x87 registers. */
17366 if (TARGET_SSE_MATH)
17367 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17368 reg_alloc_order [pos++] = i;
17370 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17371 reg_alloc_order [pos++] = i;
17373 /* Initialize the rest of array as we do not allocate some registers
17375 while (pos < FIRST_PSEUDO_REGISTER)
17376 reg_alloc_order [pos++] = 0;
17379 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17380 struct attribute_spec.handler. */
17382 ix86_handle_struct_attribute (tree *node, tree name,
17383 tree args ATTRIBUTE_UNUSED,
17384 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17387 if (DECL_P (*node))
17389 if (TREE_CODE (*node) == TYPE_DECL)
17390 type = &TREE_TYPE (*node);
17395 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17396 || TREE_CODE (*type) == UNION_TYPE)))
17398 warning (OPT_Wattributes, "%qs attribute ignored",
17399 IDENTIFIER_POINTER (name));
17400 *no_add_attrs = true;
17403 else if ((is_attribute_p ("ms_struct", name)
17404 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17405 || ((is_attribute_p ("gcc_struct", name)
17406 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17408 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17409 IDENTIFIER_POINTER (name));
17410 *no_add_attrs = true;
17417 ix86_ms_bitfield_layout_p (tree record_type)
17419 return (TARGET_MS_BITFIELD_LAYOUT &&
17420 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17421 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17424 /* Returns an expression indicating where the this parameter is
17425 located on entry to the FUNCTION. */
17428 x86_this_parameter (tree function)
17430 tree type = TREE_TYPE (function);
17434 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17435 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17438 if (ix86_function_regparm (type, function) > 0)
17442 parm = TYPE_ARG_TYPES (type);
17443 /* Figure out whether or not the function has a variable number of
17445 for (; parm; parm = TREE_CHAIN (parm))
17446 if (TREE_VALUE (parm) == void_type_node)
17448 /* If not, the this parameter is in the first argument. */
17452 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17454 return gen_rtx_REG (SImode, regno);
17458 if (aggregate_value_p (TREE_TYPE (type), type))
17459 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17461 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17464 /* Determine whether x86_output_mi_thunk can succeed. */
17467 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17468 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17469 HOST_WIDE_INT vcall_offset, tree function)
17471 /* 64-bit can handle anything. */
17475 /* For 32-bit, everything's fine if we have one free register. */
17476 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17479 /* Need a free register for vcall_offset. */
17483 /* Need a free register for GOT references. */
17484 if (flag_pic && !(*targetm.binds_local_p) (function))
17487 /* Otherwise ok. */
17491 /* Output the assembler code for a thunk function. THUNK_DECL is the
17492 declaration for the thunk function itself, FUNCTION is the decl for
17493 the target function. DELTA is an immediate constant offset to be
17494 added to THIS. If VCALL_OFFSET is nonzero, the word at
17495 *(*this + vcall_offset) should be added to THIS. */
17498 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17499 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17500 HOST_WIDE_INT vcall_offset, tree function)
17503 rtx this = x86_this_parameter (function);
17506 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17507 pull it in now and let DELTA benefit. */
17510 else if (vcall_offset)
17512 /* Put the this parameter into %eax. */
17514 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17515 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17518 this_reg = NULL_RTX;
17520 /* Adjust the this parameter by a fixed constant. */
17523 xops[0] = GEN_INT (delta);
17524 xops[1] = this_reg ? this_reg : this;
17527 if (!x86_64_general_operand (xops[0], DImode))
17529 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17531 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17535 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17538 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17541 /* Adjust the this parameter by a value stored in the vtable. */
17545 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17548 int tmp_regno = 2 /* ECX */;
17549 if (lookup_attribute ("fastcall",
17550 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17551 tmp_regno = 0 /* EAX */;
17552 tmp = gen_rtx_REG (SImode, tmp_regno);
17555 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17558 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17560 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17562 /* Adjust the this parameter. */
17563 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17564 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17566 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17567 xops[0] = GEN_INT (vcall_offset);
17569 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17570 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17572 xops[1] = this_reg;
17574 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17576 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17579 /* If necessary, drop THIS back to its stack slot. */
17580 if (this_reg && this_reg != this)
17582 xops[0] = this_reg;
17584 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17587 xops[0] = XEXP (DECL_RTL (function), 0);
17590 if (!flag_pic || (*targetm.binds_local_p) (function))
17591 output_asm_insn ("jmp\t%P0", xops);
17594 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17595 tmp = gen_rtx_CONST (Pmode, tmp);
17596 tmp = gen_rtx_MEM (QImode, tmp);
17598 output_asm_insn ("jmp\t%A0", xops);
17603 if (!flag_pic || (*targetm.binds_local_p) (function))
17604 output_asm_insn ("jmp\t%P0", xops);
17609 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17610 tmp = (gen_rtx_SYMBOL_REF
17612 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17613 tmp = gen_rtx_MEM (QImode, tmp);
17615 output_asm_insn ("jmp\t%0", xops);
17618 #endif /* TARGET_MACHO */
17620 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17621 output_set_got (tmp, NULL_RTX);
17624 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17625 output_asm_insn ("jmp\t{*}%1", xops);
17631 x86_file_start (void)
17633 default_file_start ();
17635 darwin_file_start ();
17637 if (X86_FILE_START_VERSION_DIRECTIVE)
17638 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17639 if (X86_FILE_START_FLTUSED)
17640 fputs ("\t.global\t__fltused\n", asm_out_file);
17641 if (ix86_asm_dialect == ASM_INTEL)
17642 fputs ("\t.intel_syntax\n", asm_out_file);
17646 x86_field_alignment (tree field, int computed)
17648 enum machine_mode mode;
17649 tree type = TREE_TYPE (field);
17651 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17653 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17654 ? get_inner_array_type (type) : type);
17655 if (mode == DFmode || mode == DCmode
17656 || GET_MODE_CLASS (mode) == MODE_INT
17657 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17658 return MIN (32, computed);
17662 /* Output assembler code to FILE to increment profiler label # LABELNO
17663 for profiling a function entry. */
17665 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17670 #ifndef NO_PROFILE_COUNTERS
17671 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17673 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17677 #ifndef NO_PROFILE_COUNTERS
17678 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17680 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17684 #ifndef NO_PROFILE_COUNTERS
17685 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17686 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17688 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17692 #ifndef NO_PROFILE_COUNTERS
17693 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17694 PROFILE_COUNT_REGISTER);
17696 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17700 /* We don't have exact information about the insn sizes, but we may assume
17701 quite safely that we are informed about all 1 byte insns and memory
17702 address sizes. This is enough to eliminate unnecessary padding in
17706 min_insn_size (rtx insn)
17710 if (!INSN_P (insn) || !active_insn_p (insn))
17713 /* Discard alignments we've emit and jump instructions. */
17714 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17715 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17717 if (GET_CODE (insn) == JUMP_INSN
17718 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17719 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17722 /* Important case - calls are always 5 bytes.
17723 It is common to have many calls in the row. */
17724 if (GET_CODE (insn) == CALL_INSN
17725 && symbolic_reference_mentioned_p (PATTERN (insn))
17726 && !SIBLING_CALL_P (insn))
17728 if (get_attr_length (insn) <= 1)
17731 /* For normal instructions we may rely on the sizes of addresses
17732 and the presence of symbol to require 4 bytes of encoding.
17733 This is not the case for jumps where references are PC relative. */
17734 if (GET_CODE (insn) != JUMP_INSN)
17736 l = get_attr_length_address (insn);
17737 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17746 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17750 ix86_avoid_jump_misspredicts (void)
17752 rtx insn, start = get_insns ();
17753 int nbytes = 0, njumps = 0;
17756 /* Look for all minimal intervals of instructions containing 4 jumps.
17757 The intervals are bounded by START and INSN. NBYTES is the total
17758 size of instructions in the interval including INSN and not including
17759 START. When the NBYTES is smaller than 16 bytes, it is possible
17760 that the end of START and INSN ends up in the same 16byte page.
17762 The smallest offset in the page INSN can start is the case where START
17763 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17764 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17766 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17769 nbytes += min_insn_size (insn);
17771 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17772 INSN_UID (insn), min_insn_size (insn));
17773 if ((GET_CODE (insn) == JUMP_INSN
17774 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17775 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17776 || GET_CODE (insn) == CALL_INSN)
17783 start = NEXT_INSN (start);
17784 if ((GET_CODE (start) == JUMP_INSN
17785 && GET_CODE (PATTERN (start)) != ADDR_VEC
17786 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17787 || GET_CODE (start) == CALL_INSN)
17788 njumps--, isjump = 1;
17791 nbytes -= min_insn_size (start);
17793 gcc_assert (njumps >= 0);
17795 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17796 INSN_UID (start), INSN_UID (insn), nbytes);
17798 if (njumps == 3 && isjump && nbytes < 16)
17800 int padsize = 15 - nbytes + min_insn_size (insn);
17803 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17804 INSN_UID (insn), padsize);
17805 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17810 /* AMD Athlon works faster
17811 when RET is not destination of conditional jump or directly preceded
17812 by other jump instruction. We avoid the penalty by inserting NOP just
17813 before the RET instructions in such cases. */
17815 ix86_pad_returns (void)
17820 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17822 basic_block bb = e->src;
17823 rtx ret = BB_END (bb);
17825 bool replace = false;
17827 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17828 || !maybe_hot_bb_p (bb))
17830 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17831 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17833 if (prev && GET_CODE (prev) == CODE_LABEL)
17838 FOR_EACH_EDGE (e, ei, bb->preds)
17839 if (EDGE_FREQUENCY (e) && e->src->index >= 0
17840 && !(e->flags & EDGE_FALLTHRU))
17845 prev = prev_active_insn (ret);
17847 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17848 || GET_CODE (prev) == CALL_INSN))
17850 /* Empty functions get branch mispredict even when the jump destination
17851 is not visible to us. */
17852 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
17857 emit_insn_before (gen_return_internal_long (), ret);
17863 /* Implement machine specific optimizations. We implement padding of returns
17864 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
17868 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
17869 ix86_pad_returns ();
17870 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
17871 ix86_avoid_jump_misspredicts ();
17874 /* Return nonzero when QImode register that must be represented via REX prefix
17877 x86_extended_QIreg_mentioned_p (rtx insn)
17880 extract_insn_cached (insn);
17881 for (i = 0; i < recog_data.n_operands; i++)
17882 if (REG_P (recog_data.operand[i])
17883 && REGNO (recog_data.operand[i]) >= 4)
17888 /* Return nonzero when P points to register encoded via REX prefix.
17889 Called via for_each_rtx. */
17891 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
17893 unsigned int regno;
17896 regno = REGNO (*p);
17897 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
17900 /* Return true when INSN mentions register that must be encoded using REX
17903 x86_extended_reg_mentioned_p (rtx insn)
17905 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
17908 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
17909 optabs would emit if we didn't have TFmode patterns. */
17912 x86_emit_floatuns (rtx operands[2])
17914 rtx neglab, donelab, i0, i1, f0, in, out;
17915 enum machine_mode mode, inmode;
17917 inmode = GET_MODE (operands[1]);
17918 gcc_assert (inmode == SImode || inmode == DImode);
17921 in = force_reg (inmode, operands[1]);
17922 mode = GET_MODE (out);
17923 neglab = gen_label_rtx ();
17924 donelab = gen_label_rtx ();
17925 i1 = gen_reg_rtx (Pmode);
17926 f0 = gen_reg_rtx (mode);
17928 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
17930 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
17931 emit_jump_insn (gen_jump (donelab));
17934 emit_label (neglab);
17936 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17937 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17938 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
17939 expand_float (f0, i0, 0);
17940 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
17942 emit_label (donelab);
17945 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17946 with all elements equal to VAR. Return true if successful. */
17949 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
17950 rtx target, rtx val)
17952 enum machine_mode smode, wsmode, wvmode;
17967 val = force_reg (GET_MODE_INNER (mode), val);
17968 x = gen_rtx_VEC_DUPLICATE (mode, val);
17969 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17975 if (TARGET_SSE || TARGET_3DNOW_A)
17977 val = gen_lowpart (SImode, val);
17978 x = gen_rtx_TRUNCATE (HImode, val);
17979 x = gen_rtx_VEC_DUPLICATE (mode, x);
17980 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18002 /* Extend HImode to SImode using a paradoxical SUBREG. */
18003 tmp1 = gen_reg_rtx (SImode);
18004 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18005 /* Insert the SImode value as low element of V4SImode vector. */
18006 tmp2 = gen_reg_rtx (V4SImode);
18007 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18008 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18009 CONST0_RTX (V4SImode),
18011 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18012 /* Cast the V4SImode vector back to a V8HImode vector. */
18013 tmp1 = gen_reg_rtx (V8HImode);
18014 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18015 /* Duplicate the low short through the whole low SImode word. */
18016 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18017 /* Cast the V8HImode vector back to a V4SImode vector. */
18018 tmp2 = gen_reg_rtx (V4SImode);
18019 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18020 /* Replicate the low element of the V4SImode vector. */
18021 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18022 /* Cast the V2SImode back to V8HImode, and store in target. */
18023 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18034 /* Extend QImode to SImode using a paradoxical SUBREG. */
18035 tmp1 = gen_reg_rtx (SImode);
18036 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18037 /* Insert the SImode value as low element of V4SImode vector. */
18038 tmp2 = gen_reg_rtx (V4SImode);
18039 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18040 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18041 CONST0_RTX (V4SImode),
18043 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18044 /* Cast the V4SImode vector back to a V16QImode vector. */
18045 tmp1 = gen_reg_rtx (V16QImode);
18046 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18047 /* Duplicate the low byte through the whole low SImode word. */
18048 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18049 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18050 /* Cast the V16QImode vector back to a V4SImode vector. */
18051 tmp2 = gen_reg_rtx (V4SImode);
18052 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18053 /* Replicate the low element of the V4SImode vector. */
18054 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18055 /* Cast the V2SImode back to V16QImode, and store in target. */
18056 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18064 /* Replicate the value once into the next wider mode and recurse. */
18065 val = convert_modes (wsmode, smode, val, true);
18066 x = expand_simple_binop (wsmode, ASHIFT, val,
18067 GEN_INT (GET_MODE_BITSIZE (smode)),
18068 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18069 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18071 x = gen_reg_rtx (wvmode);
18072 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18073 gcc_unreachable ();
18074 emit_move_insn (target, gen_lowpart (mode, x));
18082 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18083 whose ONE_VAR element is VAR, and other elements are zero. Return true
18087 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18088 rtx target, rtx var, int one_var)
18090 enum machine_mode vsimode;
18106 var = force_reg (GET_MODE_INNER (mode), var);
18107 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18108 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18113 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18114 new_target = gen_reg_rtx (mode);
18116 new_target = target;
18117 var = force_reg (GET_MODE_INNER (mode), var);
18118 x = gen_rtx_VEC_DUPLICATE (mode, var);
18119 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18120 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18123 /* We need to shuffle the value to the correct position, so
18124 create a new pseudo to store the intermediate result. */
18126 /* With SSE2, we can use the integer shuffle insns. */
18127 if (mode != V4SFmode && TARGET_SSE2)
18129 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18131 GEN_INT (one_var == 1 ? 0 : 1),
18132 GEN_INT (one_var == 2 ? 0 : 1),
18133 GEN_INT (one_var == 3 ? 0 : 1)));
18134 if (target != new_target)
18135 emit_move_insn (target, new_target);
18139 /* Otherwise convert the intermediate result to V4SFmode and
18140 use the SSE1 shuffle instructions. */
18141 if (mode != V4SFmode)
18143 tmp = gen_reg_rtx (V4SFmode);
18144 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18149 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18151 GEN_INT (one_var == 1 ? 0 : 1),
18152 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18153 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18155 if (mode != V4SFmode)
18156 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18157 else if (tmp != target)
18158 emit_move_insn (target, tmp);
18160 else if (target != new_target)
18161 emit_move_insn (target, new_target);
18166 vsimode = V4SImode;
18172 vsimode = V2SImode;
18178 /* Zero extend the variable element to SImode and recurse. */
18179 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18181 x = gen_reg_rtx (vsimode);
18182 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18184 gcc_unreachable ();
18186 emit_move_insn (target, gen_lowpart (mode, x));
18194 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18195 consisting of the values in VALS. It is known that all elements
18196 except ONE_VAR are constants. Return true if successful. */
18199 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18200 rtx target, rtx vals, int one_var)
18202 rtx var = XVECEXP (vals, 0, one_var);
18203 enum machine_mode wmode;
18206 const_vec = copy_rtx (vals);
18207 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18208 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18216 /* For the two element vectors, it's just as easy to use
18217 the general case. */
18233 /* There's no way to set one QImode entry easily. Combine
18234 the variable value with its adjacent constant value, and
18235 promote to an HImode set. */
18236 x = XVECEXP (vals, 0, one_var ^ 1);
18239 var = convert_modes (HImode, QImode, var, true);
18240 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18241 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18242 x = GEN_INT (INTVAL (x) & 0xff);
18246 var = convert_modes (HImode, QImode, var, true);
18247 x = gen_int_mode (INTVAL (x) << 8, HImode);
18249 if (x != const0_rtx)
18250 var = expand_simple_binop (HImode, IOR, var, x, var,
18251 1, OPTAB_LIB_WIDEN);
18253 x = gen_reg_rtx (wmode);
18254 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18255 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18257 emit_move_insn (target, gen_lowpart (mode, x));
18264 emit_move_insn (target, const_vec);
18265 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18269 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18270 all values variable, and none identical. */
18273 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18274 rtx target, rtx vals)
18276 enum machine_mode half_mode = GET_MODE_INNER (mode);
18277 rtx op0 = NULL, op1 = NULL;
18278 bool use_vec_concat = false;
18284 if (!mmx_ok && !TARGET_SSE)
18290 /* For the two element vectors, we always implement VEC_CONCAT. */
18291 op0 = XVECEXP (vals, 0, 0);
18292 op1 = XVECEXP (vals, 0, 1);
18293 use_vec_concat = true;
18297 half_mode = V2SFmode;
18300 half_mode = V2SImode;
18306 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18307 Recurse to load the two halves. */
18309 op0 = gen_reg_rtx (half_mode);
18310 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18311 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18313 op1 = gen_reg_rtx (half_mode);
18314 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18315 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18317 use_vec_concat = true;
18328 gcc_unreachable ();
18331 if (use_vec_concat)
18333 if (!register_operand (op0, half_mode))
18334 op0 = force_reg (half_mode, op0);
18335 if (!register_operand (op1, half_mode))
18336 op1 = force_reg (half_mode, op1);
18338 emit_insn (gen_rtx_SET (VOIDmode, target,
18339 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18343 int i, j, n_elts, n_words, n_elt_per_word;
18344 enum machine_mode inner_mode;
18345 rtx words[4], shift;
18347 inner_mode = GET_MODE_INNER (mode);
18348 n_elts = GET_MODE_NUNITS (mode);
18349 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18350 n_elt_per_word = n_elts / n_words;
18351 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18353 for (i = 0; i < n_words; ++i)
18355 rtx word = NULL_RTX;
18357 for (j = 0; j < n_elt_per_word; ++j)
18359 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18360 elt = convert_modes (word_mode, inner_mode, elt, true);
18366 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18367 word, 1, OPTAB_LIB_WIDEN);
18368 word = expand_simple_binop (word_mode, IOR, word, elt,
18369 word, 1, OPTAB_LIB_WIDEN);
18377 emit_move_insn (target, gen_lowpart (mode, words[0]));
18378 else if (n_words == 2)
18380 rtx tmp = gen_reg_rtx (mode);
18381 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18382 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18383 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18384 emit_move_insn (target, tmp);
18386 else if (n_words == 4)
18388 rtx tmp = gen_reg_rtx (V4SImode);
18389 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18390 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18391 emit_move_insn (target, gen_lowpart (mode, tmp));
18394 gcc_unreachable ();
18398 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18399 instructions unless MMX_OK is true. */
18402 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18404 enum machine_mode mode = GET_MODE (target);
18405 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18406 int n_elts = GET_MODE_NUNITS (mode);
18407 int n_var = 0, one_var = -1;
18408 bool all_same = true, all_const_zero = true;
18412 for (i = 0; i < n_elts; ++i)
18414 x = XVECEXP (vals, 0, i);
18415 if (!CONSTANT_P (x))
18416 n_var++, one_var = i;
18417 else if (x != CONST0_RTX (inner_mode))
18418 all_const_zero = false;
18419 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18423 /* Constants are best loaded from the constant pool. */
18426 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18430 /* If all values are identical, broadcast the value. */
18432 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18433 XVECEXP (vals, 0, 0)))
18436 /* Values where only one field is non-constant are best loaded from
18437 the pool and overwritten via move later. */
18441 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18442 XVECEXP (vals, 0, one_var),
18446 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18450 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18454 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18456 enum machine_mode mode = GET_MODE (target);
18457 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18458 bool use_vec_merge = false;
18467 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18468 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18470 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18472 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18473 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18483 /* For the two element vectors, we implement a VEC_CONCAT with
18484 the extraction of the other element. */
18486 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18487 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18490 op0 = val, op1 = tmp;
18492 op0 = tmp, op1 = val;
18494 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18495 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18503 use_vec_merge = true;
18507 /* tmp = target = A B C D */
18508 tmp = copy_to_reg (target);
18509 /* target = A A B B */
18510 emit_insn (gen_sse_unpcklps (target, target, target));
18511 /* target = X A B B */
18512 ix86_expand_vector_set (false, target, val, 0);
18513 /* target = A X C D */
18514 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18515 GEN_INT (1), GEN_INT (0),
18516 GEN_INT (2+4), GEN_INT (3+4)));
18520 /* tmp = target = A B C D */
18521 tmp = copy_to_reg (target);
18522 /* tmp = X B C D */
18523 ix86_expand_vector_set (false, tmp, val, 0);
18524 /* target = A B X D */
18525 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18526 GEN_INT (0), GEN_INT (1),
18527 GEN_INT (0+4), GEN_INT (3+4)));
18531 /* tmp = target = A B C D */
18532 tmp = copy_to_reg (target);
18533 /* tmp = X B C D */
18534 ix86_expand_vector_set (false, tmp, val, 0);
18535 /* target = A B X D */
18536 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18537 GEN_INT (0), GEN_INT (1),
18538 GEN_INT (2+4), GEN_INT (0+4)));
18542 gcc_unreachable ();
18547 /* Element 0 handled by vec_merge below. */
18550 use_vec_merge = true;
18556 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18557 store into element 0, then shuffle them back. */
18561 order[0] = GEN_INT (elt);
18562 order[1] = const1_rtx;
18563 order[2] = const2_rtx;
18564 order[3] = GEN_INT (3);
18565 order[elt] = const0_rtx;
18567 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18568 order[1], order[2], order[3]));
18570 ix86_expand_vector_set (false, target, val, 0);
18572 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18573 order[1], order[2], order[3]));
18577 /* For SSE1, we have to reuse the V4SF code. */
18578 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18579 gen_lowpart (SFmode, val), elt);
18584 use_vec_merge = TARGET_SSE2;
18587 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18598 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18599 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18600 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18604 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18606 emit_move_insn (mem, target);
18608 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18609 emit_move_insn (tmp, val);
18611 emit_move_insn (target, mem);
18616 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18618 enum machine_mode mode = GET_MODE (vec);
18619 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18620 bool use_vec_extr = false;
18633 use_vec_extr = true;
18645 tmp = gen_reg_rtx (mode);
18646 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18647 GEN_INT (elt), GEN_INT (elt),
18648 GEN_INT (elt+4), GEN_INT (elt+4)));
18652 tmp = gen_reg_rtx (mode);
18653 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18657 gcc_unreachable ();
18660 use_vec_extr = true;
18675 tmp = gen_reg_rtx (mode);
18676 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18677 GEN_INT (elt), GEN_INT (elt),
18678 GEN_INT (elt), GEN_INT (elt)));
18682 tmp = gen_reg_rtx (mode);
18683 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18687 gcc_unreachable ();
18690 use_vec_extr = true;
18695 /* For SSE1, we have to reuse the V4SF code. */
18696 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18697 gen_lowpart (V4SFmode, vec), elt);
18703 use_vec_extr = TARGET_SSE2;
18706 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18711 /* ??? Could extract the appropriate HImode element and shift. */
18718 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18719 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18721 /* Let the rtl optimizers know about the zero extension performed. */
18722 if (inner_mode == HImode)
18724 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18725 target = gen_lowpart (SImode, target);
18728 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18732 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18734 emit_move_insn (mem, vec);
18736 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18737 emit_move_insn (target, tmp);
18741 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18742 pattern to reduce; DEST is the destination; IN is the input vector. */
18745 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18747 rtx tmp1, tmp2, tmp3;
18749 tmp1 = gen_reg_rtx (V4SFmode);
18750 tmp2 = gen_reg_rtx (V4SFmode);
18751 tmp3 = gen_reg_rtx (V4SFmode);
18753 emit_insn (gen_sse_movhlps (tmp1, in, in));
18754 emit_insn (fn (tmp2, tmp1, in));
18756 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18757 GEN_INT (1), GEN_INT (1),
18758 GEN_INT (1+4), GEN_INT (1+4)));
18759 emit_insn (fn (dest, tmp2, tmp3));
18762 /* Target hook for scalar_mode_supported_p. */
18764 ix86_scalar_mode_supported_p (enum machine_mode mode)
18766 if (DECIMAL_FLOAT_MODE_P (mode))
18769 return default_scalar_mode_supported_p (mode);
18772 /* Implements target hook vector_mode_supported_p. */
18774 ix86_vector_mode_supported_p (enum machine_mode mode)
18776 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18778 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18780 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18782 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18787 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18789 We do this in the new i386 backend to maintain source compatibility
18790 with the old cc0-based compiler. */
18793 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18794 tree inputs ATTRIBUTE_UNUSED,
18797 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18799 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18801 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18806 /* Return true if this goes in small data/bss. */
18809 ix86_in_large_data_p (tree exp)
18811 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18814 /* Functions are never large data. */
18815 if (TREE_CODE (exp) == FUNCTION_DECL)
18818 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18820 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18821 if (strcmp (section, ".ldata") == 0
18822 || strcmp (section, ".lbss") == 0)
18828 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18830 /* If this is an incomplete type with size 0, then we can't put it
18831 in data because it might be too big when completed. */
18832 if (!size || size > ix86_section_threshold)
18839 ix86_encode_section_info (tree decl, rtx rtl, int first)
18841 default_encode_section_info (decl, rtl, first);
18843 if (TREE_CODE (decl) == VAR_DECL
18844 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18845 && ix86_in_large_data_p (decl))
18846 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
18849 /* Worker function for REVERSE_CONDITION. */
18852 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
18854 return (mode != CCFPmode && mode != CCFPUmode
18855 ? reverse_condition (code)
18856 : reverse_condition_maybe_unordered (code));
18859 /* Output code to perform an x87 FP register move, from OPERANDS[1]
18863 output_387_reg_move (rtx insn, rtx *operands)
18865 if (REG_P (operands[1])
18866 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18868 if (REGNO (operands[0]) == FIRST_STACK_REG)
18869 return output_387_ffreep (operands, 0);
18870 return "fstp\t%y0";
18872 if (STACK_TOP_P (operands[0]))
18873 return "fld%z1\t%y1";
18877 /* Output code to perform a conditional jump to LABEL, if C2 flag in
18878 FP status register is set. */
18881 ix86_emit_fp_unordered_jump (rtx label)
18883 rtx reg = gen_reg_rtx (HImode);
18886 emit_insn (gen_x86_fnstsw_1 (reg));
18888 if (TARGET_USE_SAHF)
18890 emit_insn (gen_x86_sahf_1 (reg));
18892 temp = gen_rtx_REG (CCmode, FLAGS_REG);
18893 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
18897 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
18899 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18900 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
18903 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
18904 gen_rtx_LABEL_REF (VOIDmode, label),
18906 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
18907 emit_jump_insn (temp);
18910 /* Output code to perform a log1p XFmode calculation. */
18912 void ix86_emit_i387_log1p (rtx op0, rtx op1)
18914 rtx label1 = gen_label_rtx ();
18915 rtx label2 = gen_label_rtx ();
18917 rtx tmp = gen_reg_rtx (XFmode);
18918 rtx tmp2 = gen_reg_rtx (XFmode);
18920 emit_insn (gen_absxf2 (tmp, op1));
18921 emit_insn (gen_cmpxf (tmp,
18922 CONST_DOUBLE_FROM_REAL_VALUE (
18923 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
18925 emit_jump_insn (gen_bge (label1));
18927 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18928 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
18929 emit_jump (label2);
18931 emit_label (label1);
18932 emit_move_insn (tmp, CONST1_RTX (XFmode));
18933 emit_insn (gen_addxf3 (tmp, op1, tmp));
18934 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18935 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
18937 emit_label (label2);
18940 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
18943 i386_solaris_elf_named_section (const char *name, unsigned int flags,
18946 /* With Binutils 2.15, the "@unwind" marker must be specified on
18947 every occurrence of the ".eh_frame" section, not just the first
18950 && strcmp (name, ".eh_frame") == 0)
18952 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
18953 flags & SECTION_WRITE ? "aw" : "a");
18956 default_elf_asm_named_section (name, flags, decl);
18959 /* Return the mangling of TYPE if it is an extended fundamental type. */
18961 static const char *
18962 ix86_mangle_fundamental_type (tree type)
18964 switch (TYPE_MODE (type))
18967 /* __float128 is "g". */
18970 /* "long double" or __float80 is "e". */
18977 /* For 32-bit code we can save PIC register setup by using
18978 __stack_chk_fail_local hidden function instead of calling
18979 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
18980 register, so it is better to call __stack_chk_fail directly. */
18983 ix86_stack_protect_fail (void)
18985 return TARGET_64BIT
18986 ? default_external_stack_protect_fail ()
18987 : default_hidden_stack_protect_fail ();
18990 /* Select a format to encode pointers in exception handling data. CODE
18991 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
18992 true if the symbol may be affected by dynamic relocations.
18994 ??? All x86 object file formats are capable of representing this.
18995 After all, the relocation needed is the same as for the call insn.
18996 Whether or not a particular assembler allows us to enter such, I
18997 guess we'll have to see. */
18999 asm_preferred_eh_data_format (int code, int global)
19003 int type = DW_EH_PE_sdata8;
19005 || ix86_cmodel == CM_SMALL_PIC
19006 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19007 type = DW_EH_PE_sdata4;
19008 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19010 if (ix86_cmodel == CM_SMALL
19011 || (ix86_cmodel == CM_MEDIUM && code))
19012 return DW_EH_PE_udata4;
19013 return DW_EH_PE_absptr;
19016 #include "gt-i386.h"