1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
71 struct processor_costs size_cost = { /* costs for tuning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123 /* Processor costs (relative to an add) */
125 struct processor_costs i386_cost = { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
178 struct processor_costs i486_cost = { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
231 struct processor_costs pentium_cost = {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
284 struct processor_costs pentiumpro_cost = {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
337 struct processor_costs k6_cost = {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (2), /* cost of a lea instruction */
340 COSTS_N_INSNS (1), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (3), /* HI */
344 COSTS_N_INSNS (3), /* SI */
345 COSTS_N_INSNS (3), /* DI */
346 COSTS_N_INSNS (3)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (18), /* HI */
350 COSTS_N_INSNS (18), /* SI */
351 COSTS_N_INSNS (18), /* DI */
352 COSTS_N_INSNS (18)}, /* other */
353 COSTS_N_INSNS (2), /* cost of movsx */
354 COSTS_N_INSNS (2), /* cost of movzx */
355 8, /* "large" insn */
357 3, /* cost for loading QImode using movzbl */
358 {4, 5, 4}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {2, 3, 2}, /* cost of storing integer registers */
362 4, /* cost of reg,reg fld/fst */
363 {6, 6, 6}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 4, 4}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
367 2, /* cost of moving MMX register */
368 {2, 2}, /* cost of loading MMX registers
369 in SImode and DImode */
370 {2, 2}, /* cost of storing MMX registers
371 in SImode and DImode */
372 2, /* cost of moving SSE register */
373 {2, 2, 8}, /* cost of loading SSE registers
374 in SImode, DImode and TImode */
375 {2, 2, 8}, /* cost of storing SSE registers
376 in SImode, DImode and TImode */
377 6, /* MMX or SSE register to integer */
378 32, /* size of prefetch block */
379 1, /* number of parallel prefetches */
381 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
382 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
383 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
384 COSTS_N_INSNS (2), /* cost of FABS instruction. */
385 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
386 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
390 struct processor_costs athlon_cost = {
391 COSTS_N_INSNS (1), /* cost of an add instruction */
392 COSTS_N_INSNS (2), /* cost of a lea instruction */
393 COSTS_N_INSNS (1), /* variable shift costs */
394 COSTS_N_INSNS (1), /* constant shift costs */
395 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
396 COSTS_N_INSNS (5), /* HI */
397 COSTS_N_INSNS (5), /* SI */
398 COSTS_N_INSNS (5), /* DI */
399 COSTS_N_INSNS (5)}, /* other */
400 0, /* cost of multiply per each bit set */
401 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
402 COSTS_N_INSNS (26), /* HI */
403 COSTS_N_INSNS (42), /* SI */
404 COSTS_N_INSNS (74), /* DI */
405 COSTS_N_INSNS (74)}, /* other */
406 COSTS_N_INSNS (1), /* cost of movsx */
407 COSTS_N_INSNS (1), /* cost of movzx */
408 8, /* "large" insn */
410 4, /* cost for loading QImode using movzbl */
411 {3, 4, 3}, /* cost of loading integer registers
412 in QImode, HImode and SImode.
413 Relative to reg-reg move (2). */
414 {3, 4, 3}, /* cost of storing integer registers */
415 4, /* cost of reg,reg fld/fst */
416 {4, 4, 12}, /* cost of loading fp registers
417 in SFmode, DFmode and XFmode */
418 {6, 6, 8}, /* cost of storing fp registers
419 in SFmode, DFmode and XFmode */
420 2, /* cost of moving MMX register */
421 {4, 4}, /* cost of loading MMX registers
422 in SImode and DImode */
423 {4, 4}, /* cost of storing MMX registers
424 in SImode and DImode */
425 2, /* cost of moving SSE register */
426 {4, 4, 6}, /* cost of loading SSE registers
427 in SImode, DImode and TImode */
428 {4, 4, 5}, /* cost of storing SSE registers
429 in SImode, DImode and TImode */
430 5, /* MMX or SSE register to integer */
431 64, /* size of prefetch block */
432 6, /* number of parallel prefetches */
434 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
435 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
436 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
437 COSTS_N_INSNS (2), /* cost of FABS instruction. */
438 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
439 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
443 struct processor_costs k8_cost = {
444 COSTS_N_INSNS (1), /* cost of an add instruction */
445 COSTS_N_INSNS (2), /* cost of a lea instruction */
446 COSTS_N_INSNS (1), /* variable shift costs */
447 COSTS_N_INSNS (1), /* constant shift costs */
448 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
449 COSTS_N_INSNS (4), /* HI */
450 COSTS_N_INSNS (3), /* SI */
451 COSTS_N_INSNS (4), /* DI */
452 COSTS_N_INSNS (5)}, /* other */
453 0, /* cost of multiply per each bit set */
454 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
455 COSTS_N_INSNS (26), /* HI */
456 COSTS_N_INSNS (42), /* SI */
457 COSTS_N_INSNS (74), /* DI */
458 COSTS_N_INSNS (74)}, /* other */
459 COSTS_N_INSNS (1), /* cost of movsx */
460 COSTS_N_INSNS (1), /* cost of movzx */
461 8, /* "large" insn */
463 4, /* cost for loading QImode using movzbl */
464 {3, 4, 3}, /* cost of loading integer registers
465 in QImode, HImode and SImode.
466 Relative to reg-reg move (2). */
467 {3, 4, 3}, /* cost of storing integer registers */
468 4, /* cost of reg,reg fld/fst */
469 {4, 4, 12}, /* cost of loading fp registers
470 in SFmode, DFmode and XFmode */
471 {6, 6, 8}, /* cost of storing fp registers
472 in SFmode, DFmode and XFmode */
473 2, /* cost of moving MMX register */
474 {3, 3}, /* cost of loading MMX registers
475 in SImode and DImode */
476 {4, 4}, /* cost of storing MMX registers
477 in SImode and DImode */
478 2, /* cost of moving SSE register */
479 {4, 3, 6}, /* cost of loading SSE registers
480 in SImode, DImode and TImode */
481 {4, 4, 5}, /* cost of storing SSE registers
482 in SImode, DImode and TImode */
483 5, /* MMX or SSE register to integer */
484 64, /* size of prefetch block */
485 6, /* number of parallel prefetches */
487 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (2), /* cost of FABS instruction. */
491 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
496 struct processor_costs pentium4_cost = {
497 COSTS_N_INSNS (1), /* cost of an add instruction */
498 COSTS_N_INSNS (3), /* cost of a lea instruction */
499 COSTS_N_INSNS (4), /* variable shift costs */
500 COSTS_N_INSNS (4), /* constant shift costs */
501 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
502 COSTS_N_INSNS (15), /* HI */
503 COSTS_N_INSNS (15), /* SI */
504 COSTS_N_INSNS (15), /* DI */
505 COSTS_N_INSNS (15)}, /* other */
506 0, /* cost of multiply per each bit set */
507 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
508 COSTS_N_INSNS (56), /* HI */
509 COSTS_N_INSNS (56), /* SI */
510 COSTS_N_INSNS (56), /* DI */
511 COSTS_N_INSNS (56)}, /* other */
512 COSTS_N_INSNS (1), /* cost of movsx */
513 COSTS_N_INSNS (1), /* cost of movzx */
514 16, /* "large" insn */
516 2, /* cost for loading QImode using movzbl */
517 {4, 5, 4}, /* cost of loading integer registers
518 in QImode, HImode and SImode.
519 Relative to reg-reg move (2). */
520 {2, 3, 2}, /* cost of storing integer registers */
521 2, /* cost of reg,reg fld/fst */
522 {2, 2, 6}, /* cost of loading fp registers
523 in SFmode, DFmode and XFmode */
524 {4, 4, 6}, /* cost of storing fp registers
525 in SFmode, DFmode and XFmode */
526 2, /* cost of moving MMX register */
527 {2, 2}, /* cost of loading MMX registers
528 in SImode and DImode */
529 {2, 2}, /* cost of storing MMX registers
530 in SImode and DImode */
531 12, /* cost of moving SSE register */
532 {12, 12, 12}, /* cost of loading SSE registers
533 in SImode, DImode and TImode */
534 {2, 2, 8}, /* cost of storing SSE registers
535 in SImode, DImode and TImode */
536 10, /* MMX or SSE register to integer */
537 64, /* size of prefetch block */
538 6, /* number of parallel prefetches */
540 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
541 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
542 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
543 COSTS_N_INSNS (2), /* cost of FABS instruction. */
544 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
545 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
549 struct processor_costs nocona_cost = {
550 COSTS_N_INSNS (1), /* cost of an add instruction */
551 COSTS_N_INSNS (1), /* cost of a lea instruction */
552 COSTS_N_INSNS (1), /* variable shift costs */
553 COSTS_N_INSNS (1), /* constant shift costs */
554 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
555 COSTS_N_INSNS (10), /* HI */
556 COSTS_N_INSNS (10), /* SI */
557 COSTS_N_INSNS (10), /* DI */
558 COSTS_N_INSNS (10)}, /* other */
559 0, /* cost of multiply per each bit set */
560 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
561 COSTS_N_INSNS (66), /* HI */
562 COSTS_N_INSNS (66), /* SI */
563 COSTS_N_INSNS (66), /* DI */
564 COSTS_N_INSNS (66)}, /* other */
565 COSTS_N_INSNS (1), /* cost of movsx */
566 COSTS_N_INSNS (1), /* cost of movzx */
567 16, /* "large" insn */
569 4, /* cost for loading QImode using movzbl */
570 {4, 4, 4}, /* cost of loading integer registers
571 in QImode, HImode and SImode.
572 Relative to reg-reg move (2). */
573 {4, 4, 4}, /* cost of storing integer registers */
574 3, /* cost of reg,reg fld/fst */
575 {12, 12, 12}, /* cost of loading fp registers
576 in SFmode, DFmode and XFmode */
577 {4, 4, 4}, /* cost of storing fp registers
578 in SFmode, DFmode and XFmode */
579 6, /* cost of moving MMX register */
580 {12, 12}, /* cost of loading MMX registers
581 in SImode and DImode */
582 {12, 12}, /* cost of storing MMX registers
583 in SImode and DImode */
584 6, /* cost of moving SSE register */
585 {12, 12, 12}, /* cost of loading SSE registers
586 in SImode, DImode and TImode */
587 {12, 12, 12}, /* cost of storing SSE registers
588 in SImode, DImode and TImode */
589 8, /* MMX or SSE register to integer */
590 128, /* size of prefetch block */
591 8, /* number of parallel prefetches */
593 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
594 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
595 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
596 COSTS_N_INSNS (3), /* cost of FABS instruction. */
597 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
598 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
601 /* Generic64 should produce code tuned for Nocona and K8. */
603 struct processor_costs generic64_cost = {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 /* On all chips taken into consideration lea is 2 cycles and more. With
606 this cost however our current implementation of synth_mult results in
607 use of unnecessary temporary registers causing regression on several
608 SPECfp benchmarks. */
609 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (2)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (26), /* HI */
620 COSTS_N_INSNS (42), /* SI */
621 COSTS_N_INSNS (74), /* DI */
622 COSTS_N_INSNS (74)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {4, 4, 4}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {4, 4, 4}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {12, 12, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {8, 8}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {8, 8}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {8, 8, 8}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {8, 8, 8}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 5, /* MMX or SSE register to integer */
648 64, /* size of prefetch block */
649 6, /* number of parallel prefetches */
650 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
651 is increased to perhaps more appropriate value of 5. */
653 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (8), /* cost of FABS instruction. */
657 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
661 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
663 struct processor_costs generic32_cost = {
664 COSTS_N_INSNS (1), /* cost of an add instruction */
665 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
666 COSTS_N_INSNS (1), /* variable shift costs */
667 COSTS_N_INSNS (1), /* constant shift costs */
668 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
669 COSTS_N_INSNS (4), /* HI */
670 COSTS_N_INSNS (3), /* SI */
671 COSTS_N_INSNS (4), /* DI */
672 COSTS_N_INSNS (2)}, /* other */
673 0, /* cost of multiply per each bit set */
674 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
675 COSTS_N_INSNS (26), /* HI */
676 COSTS_N_INSNS (42), /* SI */
677 COSTS_N_INSNS (74), /* DI */
678 COSTS_N_INSNS (74)}, /* other */
679 COSTS_N_INSNS (1), /* cost of movsx */
680 COSTS_N_INSNS (1), /* cost of movzx */
681 8, /* "large" insn */
683 4, /* cost for loading QImode using movzbl */
684 {4, 4, 4}, /* cost of loading integer registers
685 in QImode, HImode and SImode.
686 Relative to reg-reg move (2). */
687 {4, 4, 4}, /* cost of storing integer registers */
688 4, /* cost of reg,reg fld/fst */
689 {12, 12, 12}, /* cost of loading fp registers
690 in SFmode, DFmode and XFmode */
691 {6, 6, 8}, /* cost of storing fp registers
692 in SFmode, DFmode and XFmode */
693 2, /* cost of moving MMX register */
694 {8, 8}, /* cost of loading MMX registers
695 in SImode and DImode */
696 {8, 8}, /* cost of storing MMX registers
697 in SImode and DImode */
698 2, /* cost of moving SSE register */
699 {8, 8, 8}, /* cost of loading SSE registers
700 in SImode, DImode and TImode */
701 {8, 8, 8}, /* cost of storing SSE registers
702 in SImode, DImode and TImode */
703 5, /* MMX or SSE register to integer */
704 64, /* size of prefetch block */
705 6, /* number of parallel prefetches */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
715 const struct processor_costs *ix86_cost = &pentium_cost;
717 /* Processor feature/optimization bitmasks. */
718 #define m_386 (1<<PROCESSOR_I386)
719 #define m_486 (1<<PROCESSOR_I486)
720 #define m_PENT (1<<PROCESSOR_PENTIUM)
721 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
722 #define m_K6 (1<<PROCESSOR_K6)
723 #define m_ATHLON (1<<PROCESSOR_ATHLON)
724 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
725 #define m_K8 (1<<PROCESSOR_K8)
726 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
727 #define m_NOCONA (1<<PROCESSOR_NOCONA)
728 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
729 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
730 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
732 /* Generic instruction choice should be common subset of supported CPUs
733 (PPro/PENT4/NOCONA/Athlon/K8). */
735 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
736 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
737 generic because it is not working well with PPro base chips. */
738 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
739 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
740 const int x86_zero_extend_with_and = m_486 | m_PENT;
741 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
742 const int x86_double_with_add = ~m_386;
743 const int x86_use_bit_test = m_386;
744 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
745 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
746 const int x86_3dnow_a = m_ATHLON_K8;
747 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
748 /* Branch hints were put in P4 based on simulation result. But
749 after P4 was made, no performance benefit was observed with
750 branch hints. It also increases the code size. As the result,
751 icc never generates branch hints. */
752 const int x86_branch_hints = 0;
753 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
754 /* We probably ought to watch for partial register stalls on Generic32
755 compilation setting as well. However in current implementation the
756 partial register stalls are not eliminated very well - they can
757 be introduced via subregs synthesized by combine and can happen
758 in caller/callee saving sequences.
759 Because this option pays back little on PPro based chips and is in conflict
760 with partial reg. dependencies used by Athlon/P4 based chips, it is better
761 to leave it off for generic32 for now. */
762 const int x86_partial_reg_stall = m_PPRO;
763 const int x86_partial_flag_reg_stall = m_GENERIC;
764 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
765 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
766 const int x86_use_mov0 = m_K6;
767 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
768 const int x86_read_modify_write = ~m_PENT;
769 const int x86_read_modify = ~(m_PENT | m_PPRO);
770 const int x86_split_long_moves = m_PPRO;
771 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
772 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
773 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
774 const int x86_qimode_math = ~(0);
775 const int x86_promote_qi_regs = 0;
776 /* On PPro this flag is meant to avoid partial register stalls. Just like
777 the x86_partial_reg_stall this option might be considered for Generic32
778 if our scheme for avoiding partial stalls was more effective. */
779 const int x86_himode_math = ~(m_PPRO);
780 const int x86_promote_hi_regs = m_PPRO;
781 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
782 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
783 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
784 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
785 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
786 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
787 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
788 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
789 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
790 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
791 const int x86_shift1 = ~m_486;
792 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
793 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
794 that thread 128bit SSE registers as single units versus K8 based chips that
795 divide SSE registers to two 64bit halves.
796 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
797 to allow register renaming on 128bit SSE units, but usually results in one
798 extra microop on 64bit SSE units. Experimental results shows that disabling
799 this option on P4 brings over 20% SPECfp regression, while enabling it on
800 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
802 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
803 /* Set for machines where the type and dependencies are resolved on SSE
804 register parts instead of whole registers, so we may maintain just
805 lower part of scalar values in proper format leaving the upper part
807 const int x86_sse_split_regs = m_ATHLON_K8;
808 const int x86_sse_typeless_stores = m_ATHLON_K8;
809 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
810 const int x86_use_ffreep = m_ATHLON_K8;
811 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
812 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
814 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
815 integer data in xmm registers. Which results in pretty abysmal code. */
816 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
818 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
819 /* Some CPU cores are not able to predict more than 4 branch instructions in
820 the 16 byte window. */
821 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
822 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
823 const int x86_use_bt = m_ATHLON_K8;
824 /* Compare and exchange was added for 80486. */
825 const int x86_cmpxchg = ~m_386;
826 /* Compare and exchange 8 bytes was added for pentium. */
827 const int x86_cmpxchg8b = ~(m_386 | m_486);
828 /* Compare and exchange 16 bytes was added for nocona. */
829 const int x86_cmpxchg16b = m_NOCONA;
830 /* Exchange and add was added for 80486. */
831 const int x86_xadd = ~m_386;
832 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
834 /* In case the average insn count for single function invocation is
835 lower than this constant, emit fast (but longer) prologue and
837 #define FAST_PROLOGUE_INSN_COUNT 20
839 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
840 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
841 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
842 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
844 /* Array of the smallest class containing reg number REGNO, indexed by
845 REGNO. Used by REGNO_REG_CLASS in i386.h. */
847 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
850 AREG, DREG, CREG, BREG,
852 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
854 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
855 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
858 /* flags, fpsr, dirflag, frame */
859 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
860 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
862 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
864 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
865 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
866 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
870 /* The "default" register map used in 32bit mode. */
872 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
874 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
875 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
876 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
877 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
878 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
879 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
880 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
883 static int const x86_64_int_parameter_registers[6] =
885 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
886 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
889 static int const x86_64_int_return_registers[4] =
891 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
894 /* The "default" register map used in 64bit mode. */
895 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
897 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
898 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
899 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
900 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
901 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
902 8,9,10,11,12,13,14,15, /* extended integer registers */
903 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
906 /* Define the register numbers to be used in Dwarf debugging information.
907 The SVR4 reference port C compiler uses the following register numbers
908 in its Dwarf output code:
909 0 for %eax (gcc regno = 0)
910 1 for %ecx (gcc regno = 2)
911 2 for %edx (gcc regno = 1)
912 3 for %ebx (gcc regno = 3)
913 4 for %esp (gcc regno = 7)
914 5 for %ebp (gcc regno = 6)
915 6 for %esi (gcc regno = 4)
916 7 for %edi (gcc regno = 5)
917 The following three DWARF register numbers are never generated by
918 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
919 believes these numbers have these meanings.
920 8 for %eip (no gcc equivalent)
921 9 for %eflags (gcc regno = 17)
922 10 for %trapno (no gcc equivalent)
923 It is not at all clear how we should number the FP stack registers
924 for the x86 architecture. If the version of SDB on x86/svr4 were
925 a bit less brain dead with respect to floating-point then we would
926 have a precedent to follow with respect to DWARF register numbers
927 for x86 FP registers, but the SDB on x86/svr4 is so completely
928 broken with respect to FP registers that it is hardly worth thinking
929 of it as something to strive for compatibility with.
930 The version of x86/svr4 SDB I have at the moment does (partially)
931 seem to believe that DWARF register number 11 is associated with
932 the x86 register %st(0), but that's about all. Higher DWARF
933 register numbers don't seem to be associated with anything in
934 particular, and even for DWARF regno 11, SDB only seems to under-
935 stand that it should say that a variable lives in %st(0) (when
936 asked via an `=' command) if we said it was in DWARF regno 11,
937 but SDB still prints garbage when asked for the value of the
938 variable in question (via a `/' command).
939 (Also note that the labels SDB prints for various FP stack regs
940 when doing an `x' command are all wrong.)
941 Note that these problems generally don't affect the native SVR4
942 C compiler because it doesn't allow the use of -O with -g and
943 because when it is *not* optimizing, it allocates a memory
944 location for each floating-point variable, and the memory
945 location is what gets described in the DWARF AT_location
946 attribute for the variable in question.
947 Regardless of the severe mental illness of the x86/svr4 SDB, we
948 do something sensible here and we use the following DWARF
949 register numbers. Note that these are all stack-top-relative
951 11 for %st(0) (gcc regno = 8)
952 12 for %st(1) (gcc regno = 9)
953 13 for %st(2) (gcc regno = 10)
954 14 for %st(3) (gcc regno = 11)
955 15 for %st(4) (gcc regno = 12)
956 16 for %st(5) (gcc regno = 13)
957 17 for %st(6) (gcc regno = 14)
958 18 for %st(7) (gcc regno = 15)
960 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
962 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
963 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
964 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
965 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
966 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
967 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
968 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
971 /* Test and compare insns in i386.md store the information needed to
972 generate branch and scc insns here. */
974 rtx ix86_compare_op0 = NULL_RTX;
975 rtx ix86_compare_op1 = NULL_RTX;
976 rtx ix86_compare_emitted = NULL_RTX;
978 /* Size of the register save area. */
979 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
981 /* Define the structure for the machine field in struct function. */
983 struct stack_local_entry GTY(())
988 struct stack_local_entry *next;
991 /* Structure describing stack frame layout.
992 Stack grows downward:
998 saved frame pointer if frame_pointer_needed
999 <- HARD_FRAME_POINTER
1004 [va_arg registers] (
1005 > to_allocate <- FRAME_POINTER
1015 HOST_WIDE_INT frame;
1017 int outgoing_arguments_size;
1020 HOST_WIDE_INT to_allocate;
1021 /* The offsets relative to ARG_POINTER. */
1022 HOST_WIDE_INT frame_pointer_offset;
1023 HOST_WIDE_INT hard_frame_pointer_offset;
1024 HOST_WIDE_INT stack_pointer_offset;
1026 /* When save_regs_using_mov is set, emit prologue using
1027 move instead of push instructions. */
1028 bool save_regs_using_mov;
1031 /* Code model option. */
1032 enum cmodel ix86_cmodel;
1034 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1036 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1038 /* Which unit we are generating floating point math for. */
1039 enum fpmath_unit ix86_fpmath;
1041 /* Which cpu are we scheduling for. */
1042 enum processor_type ix86_tune;
1043 /* Which instruction set architecture to use. */
1044 enum processor_type ix86_arch;
1046 /* true if sse prefetch instruction is not NOOP. */
1047 int x86_prefetch_sse;
1049 /* ix86_regparm_string as a number */
1050 static int ix86_regparm;
1052 /* -mstackrealign option */
1053 extern int ix86_force_align_arg_pointer;
1054 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1056 /* Preferred alignment for stack boundary in bits. */
1057 unsigned int ix86_preferred_stack_boundary;
1059 /* Values 1-5: see jump.c */
1060 int ix86_branch_cost;
1062 /* Variables which are this size or smaller are put in the data/bss
1063 or ldata/lbss sections. */
1065 int ix86_section_threshold = 65536;
1067 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1068 char internal_label_prefix[16];
1069 int internal_label_prefix_len;
1071 static bool ix86_handle_option (size_t, const char *, int);
1072 static void output_pic_addr_const (FILE *, rtx, int);
1073 static void put_condition_code (enum rtx_code, enum machine_mode,
1075 static const char *get_some_local_dynamic_name (void);
1076 static int get_some_local_dynamic_name_1 (rtx *, void *);
1077 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1078 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1080 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1081 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1083 static rtx get_thread_pointer (int);
1084 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1085 static void get_pc_thunk_name (char [32], unsigned int);
1086 static rtx gen_push (rtx);
1087 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1088 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1089 static struct machine_function * ix86_init_machine_status (void);
1090 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1091 static int ix86_nsaved_regs (void);
1092 static void ix86_emit_save_regs (void);
1093 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1094 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1095 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1096 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1097 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1098 static rtx ix86_expand_aligntest (rtx, int);
1099 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1100 static int ix86_issue_rate (void);
1101 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1102 static int ia32_multipass_dfa_lookahead (void);
1103 static void ix86_init_mmx_sse_builtins (void);
1104 static rtx x86_this_parameter (tree);
1105 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1106 HOST_WIDE_INT, tree);
1107 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1108 static void x86_file_start (void);
1109 static void ix86_reorg (void);
1110 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1111 static tree ix86_build_builtin_va_list (void);
1112 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1114 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1115 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1116 static bool ix86_vector_mode_supported_p (enum machine_mode);
1118 static int ix86_address_cost (rtx);
1119 static bool ix86_cannot_force_const_mem (rtx);
1120 static rtx ix86_delegitimize_address (rtx);
1122 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1124 struct builtin_description;
1125 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1127 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1129 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1130 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1131 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1132 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1133 static rtx safe_vector_operand (rtx, enum machine_mode);
1134 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1135 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1136 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1137 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1138 static int ix86_fp_comparison_cost (enum rtx_code code);
1139 static unsigned int ix86_select_alt_pic_regnum (void);
1140 static int ix86_save_reg (unsigned int, int);
1141 static void ix86_compute_frame_layout (struct ix86_frame *);
1142 static int ix86_comp_type_attributes (tree, tree);
1143 static int ix86_function_regparm (tree, tree);
1144 const struct attribute_spec ix86_attribute_table[];
1145 static bool ix86_function_ok_for_sibcall (tree, tree);
1146 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1147 static int ix86_value_regno (enum machine_mode, tree, tree);
1148 static bool contains_128bit_aligned_vector_p (tree);
1149 static rtx ix86_struct_value_rtx (tree, int);
1150 static bool ix86_ms_bitfield_layout_p (tree);
1151 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1152 static int extended_reg_mentioned_1 (rtx *, void *);
1153 static bool ix86_rtx_costs (rtx, int, int, int *);
1154 static int min_insn_size (rtx);
1155 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1156 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1157 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1159 static void ix86_init_builtins (void);
1160 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1161 static const char *ix86_mangle_fundamental_type (tree);
1162 static tree ix86_stack_protect_fail (void);
1163 static rtx ix86_internal_arg_pointer (void);
1164 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1166 /* This function is only used on Solaris. */
1167 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1170 /* Register class used for passing given 64bit part of the argument.
1171 These represent classes as documented by the PS ABI, with the exception
1172 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1173 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1175 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1176 whenever possible (upper half does contain padding).
1178 enum x86_64_reg_class
1181 X86_64_INTEGER_CLASS,
1182 X86_64_INTEGERSI_CLASS,
1189 X86_64_COMPLEX_X87_CLASS,
1192 static const char * const x86_64_reg_class_name[] = {
1193 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1194 "sseup", "x87", "x87up", "cplx87", "no"
1197 #define MAX_CLASSES 4
1199 /* Table of constants used by fldpi, fldln2, etc.... */
1200 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1201 static bool ext_80387_constants_init = 0;
1202 static void init_ext_80387_constants (void);
1203 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1204 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1205 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1206 static section *x86_64_elf_select_section (tree decl, int reloc,
1207 unsigned HOST_WIDE_INT align)
1210 /* Initialize the GCC target structure. */
1211 #undef TARGET_ATTRIBUTE_TABLE
1212 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1213 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1214 # undef TARGET_MERGE_DECL_ATTRIBUTES
1215 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1218 #undef TARGET_COMP_TYPE_ATTRIBUTES
1219 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1221 #undef TARGET_INIT_BUILTINS
1222 #define TARGET_INIT_BUILTINS ix86_init_builtins
1223 #undef TARGET_EXPAND_BUILTIN
1224 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1226 #undef TARGET_ASM_FUNCTION_EPILOGUE
1227 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1229 #undef TARGET_ENCODE_SECTION_INFO
1230 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1231 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1233 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1236 #undef TARGET_ASM_OPEN_PAREN
1237 #define TARGET_ASM_OPEN_PAREN ""
1238 #undef TARGET_ASM_CLOSE_PAREN
1239 #define TARGET_ASM_CLOSE_PAREN ""
1241 #undef TARGET_ASM_ALIGNED_HI_OP
1242 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1243 #undef TARGET_ASM_ALIGNED_SI_OP
1244 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1246 #undef TARGET_ASM_ALIGNED_DI_OP
1247 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1250 #undef TARGET_ASM_UNALIGNED_HI_OP
1251 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1252 #undef TARGET_ASM_UNALIGNED_SI_OP
1253 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1254 #undef TARGET_ASM_UNALIGNED_DI_OP
1255 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1257 #undef TARGET_SCHED_ADJUST_COST
1258 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1259 #undef TARGET_SCHED_ISSUE_RATE
1260 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1261 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1262 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1263 ia32_multipass_dfa_lookahead
1265 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1266 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1269 #undef TARGET_HAVE_TLS
1270 #define TARGET_HAVE_TLS true
1272 #undef TARGET_CANNOT_FORCE_CONST_MEM
1273 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1274 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1275 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1277 #undef TARGET_DELEGITIMIZE_ADDRESS
1278 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1280 #undef TARGET_MS_BITFIELD_LAYOUT_P
1281 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1284 #undef TARGET_BINDS_LOCAL_P
1285 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1288 #undef TARGET_ASM_OUTPUT_MI_THUNK
1289 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1290 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1291 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1293 #undef TARGET_ASM_FILE_START
1294 #define TARGET_ASM_FILE_START x86_file_start
1296 #undef TARGET_DEFAULT_TARGET_FLAGS
1297 #define TARGET_DEFAULT_TARGET_FLAGS \
1299 | TARGET_64BIT_DEFAULT \
1300 | TARGET_SUBTARGET_DEFAULT \
1301 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1303 #undef TARGET_HANDLE_OPTION
1304 #define TARGET_HANDLE_OPTION ix86_handle_option
1306 #undef TARGET_RTX_COSTS
1307 #define TARGET_RTX_COSTS ix86_rtx_costs
1308 #undef TARGET_ADDRESS_COST
1309 #define TARGET_ADDRESS_COST ix86_address_cost
1311 #undef TARGET_FIXED_CONDITION_CODE_REGS
1312 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1313 #undef TARGET_CC_MODES_COMPATIBLE
1314 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1316 #undef TARGET_MACHINE_DEPENDENT_REORG
1317 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1319 #undef TARGET_BUILD_BUILTIN_VA_LIST
1320 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1322 #undef TARGET_MD_ASM_CLOBBERS
1323 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1325 #undef TARGET_PROMOTE_PROTOTYPES
1326 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1327 #undef TARGET_STRUCT_VALUE_RTX
1328 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1329 #undef TARGET_SETUP_INCOMING_VARARGS
1330 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1331 #undef TARGET_MUST_PASS_IN_STACK
1332 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1333 #undef TARGET_PASS_BY_REFERENCE
1334 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1335 #undef TARGET_INTERNAL_ARG_POINTER
1336 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1337 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1338 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1340 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1341 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1343 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1344 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1346 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1347 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1350 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1351 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1354 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1355 #undef TARGET_INSERT_ATTRIBUTES
1356 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1359 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1360 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1362 #undef TARGET_STACK_PROTECT_FAIL
1363 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1365 #undef TARGET_FUNCTION_VALUE
1366 #define TARGET_FUNCTION_VALUE ix86_function_value
1368 struct gcc_target targetm = TARGET_INITIALIZER;
1371 /* The svr4 ABI for the i386 says that records and unions are returned
1373 #ifndef DEFAULT_PCC_STRUCT_RETURN
1374 #define DEFAULT_PCC_STRUCT_RETURN 1
1377 /* Implement TARGET_HANDLE_OPTION. */
1380 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1387 target_flags &= ~MASK_3DNOW_A;
1388 target_flags_explicit |= MASK_3DNOW_A;
1395 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1396 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1403 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1404 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1411 target_flags &= ~MASK_SSE3;
1412 target_flags_explicit |= MASK_SSE3;
1421 /* Sometimes certain combinations of command options do not make
1422 sense on a particular target machine. You can define a macro
1423 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1424 defined, is executed once just after all the command options have
1427 Don't use this macro to turn on various extra optimizations for
1428 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1431 override_options (void)
1434 int ix86_tune_defaulted = 0;
1436 /* Comes from final.c -- no real reason to change it. */
1437 #define MAX_CODE_ALIGN 16
1441 const struct processor_costs *cost; /* Processor costs */
1442 const int target_enable; /* Target flags to enable. */
1443 const int target_disable; /* Target flags to disable. */
1444 const int align_loop; /* Default alignments. */
1445 const int align_loop_max_skip;
1446 const int align_jump;
1447 const int align_jump_max_skip;
1448 const int align_func;
1450 const processor_target_table[PROCESSOR_max] =
1452 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1453 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1454 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1455 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1456 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1457 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1458 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1459 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1460 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1461 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1462 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1465 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1468 const char *const name; /* processor name or nickname. */
1469 const enum processor_type processor;
1470 const enum pta_flags
1476 PTA_PREFETCH_SSE = 16,
1482 const processor_alias_table[] =
1484 {"i386", PROCESSOR_I386, 0},
1485 {"i486", PROCESSOR_I486, 0},
1486 {"i586", PROCESSOR_PENTIUM, 0},
1487 {"pentium", PROCESSOR_PENTIUM, 0},
1488 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1489 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1490 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1491 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1492 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1493 {"i686", PROCESSOR_PENTIUMPRO, 0},
1494 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1495 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1496 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1497 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1498 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1499 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1500 | PTA_MMX | PTA_PREFETCH_SSE},
1501 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1502 | PTA_MMX | PTA_PREFETCH_SSE},
1503 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1504 | PTA_MMX | PTA_PREFETCH_SSE},
1505 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1506 | PTA_MMX | PTA_PREFETCH_SSE},
1507 {"k6", PROCESSOR_K6, PTA_MMX},
1508 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1509 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1510 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1512 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1513 | PTA_3DNOW | PTA_3DNOW_A},
1514 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1515 | PTA_3DNOW_A | PTA_SSE},
1516 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1517 | PTA_3DNOW_A | PTA_SSE},
1518 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1519 | PTA_3DNOW_A | PTA_SSE},
1520 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1521 | PTA_SSE | PTA_SSE2 },
1522 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1523 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1524 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1525 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1526 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1527 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1528 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1529 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1530 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1531 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1534 int const pta_size = ARRAY_SIZE (processor_alias_table);
1536 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1537 SUBTARGET_OVERRIDE_OPTIONS;
1540 /* -fPIC is the default for x86_64. */
1541 if (TARGET_MACHO && TARGET_64BIT)
1544 /* Set the default values for switches whose default depends on TARGET_64BIT
1545 in case they weren't overwritten by command line options. */
1548 /* Mach-O doesn't support omitting the frame pointer for now. */
1549 if (flag_omit_frame_pointer == 2)
1550 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1551 if (flag_asynchronous_unwind_tables == 2)
1552 flag_asynchronous_unwind_tables = 1;
1553 if (flag_pcc_struct_return == 2)
1554 flag_pcc_struct_return = 0;
1558 if (flag_omit_frame_pointer == 2)
1559 flag_omit_frame_pointer = 0;
1560 if (flag_asynchronous_unwind_tables == 2)
1561 flag_asynchronous_unwind_tables = 0;
1562 if (flag_pcc_struct_return == 2)
1563 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1566 /* Need to check -mtune=generic first. */
1567 if (ix86_tune_string)
1569 if (!strcmp (ix86_tune_string, "generic")
1570 || !strcmp (ix86_tune_string, "i686")
1571 /* As special support for cross compilers we read -mtune=native
1572 as -mtune=generic. With native compilers we won't see the
1573 -mtune=native, as it was changed by the driver. */
1574 || !strcmp (ix86_tune_string, "native"))
1577 ix86_tune_string = "generic64";
1579 ix86_tune_string = "generic32";
1581 else if (!strncmp (ix86_tune_string, "generic", 7))
1582 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1586 if (ix86_arch_string)
1587 ix86_tune_string = ix86_arch_string;
1588 if (!ix86_tune_string)
1590 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1591 ix86_tune_defaulted = 1;
1594 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1595 need to use a sensible tune option. */
1596 if (!strcmp (ix86_tune_string, "generic")
1597 || !strcmp (ix86_tune_string, "x86-64")
1598 || !strcmp (ix86_tune_string, "i686"))
1601 ix86_tune_string = "generic64";
1603 ix86_tune_string = "generic32";
1606 if (!strcmp (ix86_tune_string, "x86-64"))
1607 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1608 "-mtune=generic instead as appropriate.");
1610 if (!ix86_arch_string)
1611 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1612 if (!strcmp (ix86_arch_string, "generic"))
1613 error ("generic CPU can be used only for -mtune= switch");
1614 if (!strncmp (ix86_arch_string, "generic", 7))
1615 error ("bad value (%s) for -march= switch", ix86_arch_string);
1617 if (ix86_cmodel_string != 0)
1619 if (!strcmp (ix86_cmodel_string, "small"))
1620 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1621 else if (!strcmp (ix86_cmodel_string, "medium"))
1622 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1624 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1625 else if (!strcmp (ix86_cmodel_string, "32"))
1626 ix86_cmodel = CM_32;
1627 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1628 ix86_cmodel = CM_KERNEL;
1629 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1630 ix86_cmodel = CM_LARGE;
1632 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1636 ix86_cmodel = CM_32;
1638 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1640 if (ix86_asm_string != 0)
1643 && !strcmp (ix86_asm_string, "intel"))
1644 ix86_asm_dialect = ASM_INTEL;
1645 else if (!strcmp (ix86_asm_string, "att"))
1646 ix86_asm_dialect = ASM_ATT;
1648 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1650 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1651 error ("code model %qs not supported in the %s bit mode",
1652 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1653 if (ix86_cmodel == CM_LARGE)
1654 sorry ("code model %<large%> not supported yet");
1655 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1656 sorry ("%i-bit mode not compiled in",
1657 (target_flags & MASK_64BIT) ? 64 : 32);
1659 for (i = 0; i < pta_size; i++)
1660 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1662 ix86_arch = processor_alias_table[i].processor;
1663 /* Default cpu tuning to the architecture. */
1664 ix86_tune = ix86_arch;
1665 if (processor_alias_table[i].flags & PTA_MMX
1666 && !(target_flags_explicit & MASK_MMX))
1667 target_flags |= MASK_MMX;
1668 if (processor_alias_table[i].flags & PTA_3DNOW
1669 && !(target_flags_explicit & MASK_3DNOW))
1670 target_flags |= MASK_3DNOW;
1671 if (processor_alias_table[i].flags & PTA_3DNOW_A
1672 && !(target_flags_explicit & MASK_3DNOW_A))
1673 target_flags |= MASK_3DNOW_A;
1674 if (processor_alias_table[i].flags & PTA_SSE
1675 && !(target_flags_explicit & MASK_SSE))
1676 target_flags |= MASK_SSE;
1677 if (processor_alias_table[i].flags & PTA_SSE2
1678 && !(target_flags_explicit & MASK_SSE2))
1679 target_flags |= MASK_SSE2;
1680 if (processor_alias_table[i].flags & PTA_SSE3
1681 && !(target_flags_explicit & MASK_SSE3))
1682 target_flags |= MASK_SSE3;
1683 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1684 x86_prefetch_sse = true;
1685 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1686 error ("CPU you selected does not support x86-64 "
1692 error ("bad value (%s) for -march= switch", ix86_arch_string);
1694 for (i = 0; i < pta_size; i++)
1695 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1697 ix86_tune = processor_alias_table[i].processor;
1698 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1700 if (ix86_tune_defaulted)
1702 ix86_tune_string = "x86-64";
1703 for (i = 0; i < pta_size; i++)
1704 if (! strcmp (ix86_tune_string,
1705 processor_alias_table[i].name))
1707 ix86_tune = processor_alias_table[i].processor;
1710 error ("CPU you selected does not support x86-64 "
1713 /* Intel CPUs have always interpreted SSE prefetch instructions as
1714 NOPs; so, we can enable SSE prefetch instructions even when
1715 -mtune (rather than -march) points us to a processor that has them.
1716 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1717 higher processors. */
1718 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1719 x86_prefetch_sse = true;
1723 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1726 ix86_cost = &size_cost;
1728 ix86_cost = processor_target_table[ix86_tune].cost;
1729 target_flags |= processor_target_table[ix86_tune].target_enable;
1730 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1732 /* Arrange to set up i386_stack_locals for all functions. */
1733 init_machine_status = ix86_init_machine_status;
1735 /* Validate -mregparm= value. */
1736 if (ix86_regparm_string)
1738 i = atoi (ix86_regparm_string);
1739 if (i < 0 || i > REGPARM_MAX)
1740 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1746 ix86_regparm = REGPARM_MAX;
1748 /* If the user has provided any of the -malign-* options,
1749 warn and use that value only if -falign-* is not set.
1750 Remove this code in GCC 3.2 or later. */
1751 if (ix86_align_loops_string)
1753 warning (0, "-malign-loops is obsolete, use -falign-loops");
1754 if (align_loops == 0)
1756 i = atoi (ix86_align_loops_string);
1757 if (i < 0 || i > MAX_CODE_ALIGN)
1758 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1760 align_loops = 1 << i;
1764 if (ix86_align_jumps_string)
1766 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1767 if (align_jumps == 0)
1769 i = atoi (ix86_align_jumps_string);
1770 if (i < 0 || i > MAX_CODE_ALIGN)
1771 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1773 align_jumps = 1 << i;
1777 if (ix86_align_funcs_string)
1779 warning (0, "-malign-functions is obsolete, use -falign-functions");
1780 if (align_functions == 0)
1782 i = atoi (ix86_align_funcs_string);
1783 if (i < 0 || i > MAX_CODE_ALIGN)
1784 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1786 align_functions = 1 << i;
1790 /* Default align_* from the processor table. */
1791 if (align_loops == 0)
1793 align_loops = processor_target_table[ix86_tune].align_loop;
1794 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1796 if (align_jumps == 0)
1798 align_jumps = processor_target_table[ix86_tune].align_jump;
1799 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1801 if (align_functions == 0)
1803 align_functions = processor_target_table[ix86_tune].align_func;
1806 /* Validate -mbranch-cost= value, or provide default. */
1807 ix86_branch_cost = ix86_cost->branch_cost;
1808 if (ix86_branch_cost_string)
1810 i = atoi (ix86_branch_cost_string);
1812 error ("-mbranch-cost=%d is not between 0 and 5", i);
1814 ix86_branch_cost = i;
1816 if (ix86_section_threshold_string)
1818 i = atoi (ix86_section_threshold_string);
1820 error ("-mlarge-data-threshold=%d is negative", i);
1822 ix86_section_threshold = i;
1825 if (ix86_tls_dialect_string)
1827 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1828 ix86_tls_dialect = TLS_DIALECT_GNU;
1829 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1830 ix86_tls_dialect = TLS_DIALECT_GNU2;
1831 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1832 ix86_tls_dialect = TLS_DIALECT_SUN;
1834 error ("bad value (%s) for -mtls-dialect= switch",
1835 ix86_tls_dialect_string);
1838 /* Keep nonleaf frame pointers. */
1839 if (flag_omit_frame_pointer)
1840 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1841 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1842 flag_omit_frame_pointer = 1;
1844 /* If we're doing fast math, we don't care about comparison order
1845 wrt NaNs. This lets us use a shorter comparison sequence. */
1846 if (flag_unsafe_math_optimizations)
1847 target_flags &= ~MASK_IEEE_FP;
1849 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1850 since the insns won't need emulation. */
1851 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1852 target_flags &= ~MASK_NO_FANCY_MATH_387;
1854 /* Likewise, if the target doesn't have a 387, or we've specified
1855 software floating point, don't use 387 inline intrinsics. */
1857 target_flags |= MASK_NO_FANCY_MATH_387;
1859 /* Turn on SSE2 builtins for -msse3. */
1861 target_flags |= MASK_SSE2;
1863 /* Turn on SSE builtins for -msse2. */
1865 target_flags |= MASK_SSE;
1867 /* Turn on MMX builtins for -msse. */
1870 target_flags |= MASK_MMX & ~target_flags_explicit;
1871 x86_prefetch_sse = true;
1874 /* Turn on MMX builtins for 3Dnow. */
1876 target_flags |= MASK_MMX;
1880 if (TARGET_ALIGN_DOUBLE)
1881 error ("-malign-double makes no sense in the 64bit mode");
1883 error ("-mrtd calling convention not supported in the 64bit mode");
1885 /* Enable by default the SSE and MMX builtins. Do allow the user to
1886 explicitly disable any of these. In particular, disabling SSE and
1887 MMX for kernel code is extremely useful. */
1889 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1890 & ~target_flags_explicit);
1894 /* i386 ABI does not specify red zone. It still makes sense to use it
1895 when programmer takes care to stack from being destroyed. */
1896 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1897 target_flags |= MASK_NO_RED_ZONE;
1900 /* Validate -mpreferred-stack-boundary= value, or provide default.
1901 The default of 128 bits is for Pentium III's SSE __m128. We can't
1902 change it because of optimize_size. Otherwise, we can't mix object
1903 files compiled with -Os and -On. */
1904 ix86_preferred_stack_boundary = 128;
1905 if (ix86_preferred_stack_boundary_string)
1907 i = atoi (ix86_preferred_stack_boundary_string);
1908 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1909 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1910 TARGET_64BIT ? 4 : 2);
1912 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1915 /* Accept -msseregparm only if at least SSE support is enabled. */
1916 if (TARGET_SSEREGPARM
1918 error ("-msseregparm used without SSE enabled");
1920 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1922 if (ix86_fpmath_string != 0)
1924 if (! strcmp (ix86_fpmath_string, "387"))
1925 ix86_fpmath = FPMATH_387;
1926 else if (! strcmp (ix86_fpmath_string, "sse"))
1930 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1931 ix86_fpmath = FPMATH_387;
1934 ix86_fpmath = FPMATH_SSE;
1936 else if (! strcmp (ix86_fpmath_string, "387,sse")
1937 || ! strcmp (ix86_fpmath_string, "sse,387"))
1941 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1942 ix86_fpmath = FPMATH_387;
1944 else if (!TARGET_80387)
1946 warning (0, "387 instruction set disabled, using SSE arithmetics");
1947 ix86_fpmath = FPMATH_SSE;
1950 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1953 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1956 /* If the i387 is disabled, then do not return values in it. */
1958 target_flags &= ~MASK_FLOAT_RETURNS;
1960 if ((x86_accumulate_outgoing_args & TUNEMASK)
1961 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1963 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1965 /* ??? Unwind info is not correct around the CFG unless either a frame
1966 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1967 unwind info generation to be aware of the CFG and propagating states
1969 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1970 || flag_exceptions || flag_non_call_exceptions)
1971 && flag_omit_frame_pointer
1972 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1974 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1975 warning (0, "unwind tables currently require either a frame pointer "
1976 "or -maccumulate-outgoing-args for correctness");
1977 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1980 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1983 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1984 p = strchr (internal_label_prefix, 'X');
1985 internal_label_prefix_len = p - internal_label_prefix;
1989 /* When scheduling description is not available, disable scheduler pass
1990 so it won't slow down the compilation and make x87 code slower. */
1991 if (!TARGET_SCHEDULE)
1992 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1995 /* switch to the appropriate section for output of DECL.
1996 DECL is either a `VAR_DECL' node or a constant of some sort.
1997 RELOC indicates whether forming the initial value of DECL requires
1998 link-time relocations. */
2001 x86_64_elf_select_section (tree decl, int reloc,
2002 unsigned HOST_WIDE_INT align)
2004 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2005 && ix86_in_large_data_p (decl))
2007 const char *sname = NULL;
2008 unsigned int flags = SECTION_WRITE;
2009 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2014 case SECCAT_DATA_REL:
2015 sname = ".ldata.rel";
2017 case SECCAT_DATA_REL_LOCAL:
2018 sname = ".ldata.rel.local";
2020 case SECCAT_DATA_REL_RO:
2021 sname = ".ldata.rel.ro";
2023 case SECCAT_DATA_REL_RO_LOCAL:
2024 sname = ".ldata.rel.ro.local";
2028 flags |= SECTION_BSS;
2031 case SECCAT_RODATA_MERGE_STR:
2032 case SECCAT_RODATA_MERGE_STR_INIT:
2033 case SECCAT_RODATA_MERGE_CONST:
2037 case SECCAT_SRODATA:
2044 /* We don't split these for medium model. Place them into
2045 default sections and hope for best. */
2050 /* We might get called with string constants, but get_named_section
2051 doesn't like them as they are not DECLs. Also, we need to set
2052 flags in that case. */
2054 return get_section (sname, flags, NULL);
2055 return get_named_section (decl, sname, reloc);
2058 return default_elf_select_section (decl, reloc, align);
2061 /* Build up a unique section name, expressed as a
2062 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2063 RELOC indicates whether the initial value of EXP requires
2064 link-time relocations. */
2067 x86_64_elf_unique_section (tree decl, int reloc)
2069 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2070 && ix86_in_large_data_p (decl))
2072 const char *prefix = NULL;
2073 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2074 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2076 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2079 case SECCAT_DATA_REL:
2080 case SECCAT_DATA_REL_LOCAL:
2081 case SECCAT_DATA_REL_RO:
2082 case SECCAT_DATA_REL_RO_LOCAL:
2083 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2086 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2089 case SECCAT_RODATA_MERGE_STR:
2090 case SECCAT_RODATA_MERGE_STR_INIT:
2091 case SECCAT_RODATA_MERGE_CONST:
2092 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2094 case SECCAT_SRODATA:
2101 /* We don't split these for medium model. Place them into
2102 default sections and hope for best. */
2110 plen = strlen (prefix);
2112 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2113 name = targetm.strip_name_encoding (name);
2114 nlen = strlen (name);
2116 string = alloca (nlen + plen + 1);
2117 memcpy (string, prefix, plen);
2118 memcpy (string + plen, name, nlen + 1);
2120 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2124 default_unique_section (decl, reloc);
2127 #ifdef COMMON_ASM_OP
2128 /* This says how to output assembler code to declare an
2129 uninitialized external linkage data object.
2131 For medium model x86-64 we need to use .largecomm opcode for
2134 x86_elf_aligned_common (FILE *file,
2135 const char *name, unsigned HOST_WIDE_INT size,
2138 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2139 && size > (unsigned int)ix86_section_threshold)
2140 fprintf (file, ".largecomm\t");
2142 fprintf (file, "%s", COMMON_ASM_OP);
2143 assemble_name (file, name);
2144 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2145 size, align / BITS_PER_UNIT);
2148 /* Utility function for targets to use in implementing
2149 ASM_OUTPUT_ALIGNED_BSS. */
2152 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2153 const char *name, unsigned HOST_WIDE_INT size,
2156 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2157 && size > (unsigned int)ix86_section_threshold)
2158 switch_to_section (get_named_section (decl, ".lbss", 0));
2160 switch_to_section (bss_section);
2161 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2162 #ifdef ASM_DECLARE_OBJECT_NAME
2163 last_assemble_variable_decl = decl;
2164 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2166 /* Standard thing is just output label for the object. */
2167 ASM_OUTPUT_LABEL (file, name);
2168 #endif /* ASM_DECLARE_OBJECT_NAME */
2169 ASM_OUTPUT_SKIP (file, size ? size : 1);
2174 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2176 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2177 make the problem with not enough registers even worse. */
2178 #ifdef INSN_SCHEDULING
2180 flag_schedule_insns = 0;
2184 /* The Darwin libraries never set errno, so we might as well
2185 avoid calling them when that's the only reason we would. */
2186 flag_errno_math = 0;
2188 /* The default values of these switches depend on the TARGET_64BIT
2189 that is not known at this moment. Mark these values with 2 and
2190 let user the to override these. In case there is no command line option
2191 specifying them, we will set the defaults in override_options. */
2193 flag_omit_frame_pointer = 2;
2194 flag_pcc_struct_return = 2;
2195 flag_asynchronous_unwind_tables = 2;
2196 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2197 SUBTARGET_OPTIMIZATION_OPTIONS;
2201 /* Table of valid machine attributes. */
2202 const struct attribute_spec ix86_attribute_table[] =
2204 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2205 /* Stdcall attribute says callee is responsible for popping arguments
2206 if they are not variable. */
2207 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2208 /* Fastcall attribute says callee is responsible for popping arguments
2209 if they are not variable. */
2210 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2211 /* Cdecl attribute says the callee is a normal C declaration */
2212 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2213 /* Regparm attribute specifies how many integer arguments are to be
2214 passed in registers. */
2215 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2216 /* Sseregparm attribute says we are using x86_64 calling conventions
2217 for FP arguments. */
2218 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2219 /* force_align_arg_pointer says this function realigns the stack at entry. */
2220 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2221 false, true, true, ix86_handle_cconv_attribute },
2222 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2223 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2224 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2225 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2227 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2228 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2229 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2230 SUBTARGET_ATTRIBUTE_TABLE,
2232 { NULL, 0, 0, false, false, false, NULL }
2235 /* Decide whether we can make a sibling call to a function. DECL is the
2236 declaration of the function being targeted by the call and EXP is the
2237 CALL_EXPR representing the call. */
2240 ix86_function_ok_for_sibcall (tree decl, tree exp)
2245 /* If we are generating position-independent code, we cannot sibcall
2246 optimize any indirect call, or a direct call to a global function,
2247 as the PLT requires %ebx be live. */
2248 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2255 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2256 if (POINTER_TYPE_P (func))
2257 func = TREE_TYPE (func);
2260 /* Check that the return value locations are the same. Like
2261 if we are returning floats on the 80387 register stack, we cannot
2262 make a sibcall from a function that doesn't return a float to a
2263 function that does or, conversely, from a function that does return
2264 a float to a function that doesn't; the necessary stack adjustment
2265 would not be executed. This is also the place we notice
2266 differences in the return value ABI. Note that it is ok for one
2267 of the functions to have void return type as long as the return
2268 value of the other is passed in a register. */
2269 a = ix86_function_value (TREE_TYPE (exp), func, false);
2270 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2272 if (STACK_REG_P (a) || STACK_REG_P (b))
2274 if (!rtx_equal_p (a, b))
2277 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2279 else if (!rtx_equal_p (a, b))
2282 /* If this call is indirect, we'll need to be able to use a call-clobbered
2283 register for the address of the target function. Make sure that all
2284 such registers are not used for passing parameters. */
2285 if (!decl && !TARGET_64BIT)
2289 /* We're looking at the CALL_EXPR, we need the type of the function. */
2290 type = TREE_OPERAND (exp, 0); /* pointer expression */
2291 type = TREE_TYPE (type); /* pointer type */
2292 type = TREE_TYPE (type); /* function type */
2294 if (ix86_function_regparm (type, NULL) >= 3)
2296 /* ??? Need to count the actual number of registers to be used,
2297 not the possible number of registers. Fix later. */
2302 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2303 /* Dllimport'd functions are also called indirectly. */
2304 if (decl && DECL_DLLIMPORT_P (decl)
2305 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2309 /* If we forced aligned the stack, then sibcalling would unalign the
2310 stack, which may break the called function. */
2311 if (cfun->machine->force_align_arg_pointer)
2314 /* Otherwise okay. That also includes certain types of indirect calls. */
2318 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2319 calling convention attributes;
2320 arguments as in struct attribute_spec.handler. */
2323 ix86_handle_cconv_attribute (tree *node, tree name,
2325 int flags ATTRIBUTE_UNUSED,
2328 if (TREE_CODE (*node) != FUNCTION_TYPE
2329 && TREE_CODE (*node) != METHOD_TYPE
2330 && TREE_CODE (*node) != FIELD_DECL
2331 && TREE_CODE (*node) != TYPE_DECL)
2333 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2334 IDENTIFIER_POINTER (name));
2335 *no_add_attrs = true;
2339 /* Can combine regparm with all attributes but fastcall. */
2340 if (is_attribute_p ("regparm", name))
2344 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2346 error ("fastcall and regparm attributes are not compatible");
2349 cst = TREE_VALUE (args);
2350 if (TREE_CODE (cst) != INTEGER_CST)
2352 warning (OPT_Wattributes,
2353 "%qs attribute requires an integer constant argument",
2354 IDENTIFIER_POINTER (name));
2355 *no_add_attrs = true;
2357 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2359 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2360 IDENTIFIER_POINTER (name), REGPARM_MAX);
2361 *no_add_attrs = true;
2365 && lookup_attribute (ix86_force_align_arg_pointer_string,
2366 TYPE_ATTRIBUTES (*node))
2367 && compare_tree_int (cst, REGPARM_MAX-1))
2369 error ("%s functions limited to %d register parameters",
2370 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2378 warning (OPT_Wattributes, "%qs attribute ignored",
2379 IDENTIFIER_POINTER (name));
2380 *no_add_attrs = true;
2384 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2385 if (is_attribute_p ("fastcall", name))
2387 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2389 error ("fastcall and cdecl attributes are not compatible");
2391 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2393 error ("fastcall and stdcall attributes are not compatible");
2395 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2397 error ("fastcall and regparm attributes are not compatible");
2401 /* Can combine stdcall with fastcall (redundant), regparm and
2403 else if (is_attribute_p ("stdcall", name))
2405 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2407 error ("stdcall and cdecl attributes are not compatible");
2409 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2411 error ("stdcall and fastcall attributes are not compatible");
2415 /* Can combine cdecl with regparm and sseregparm. */
2416 else if (is_attribute_p ("cdecl", name))
2418 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2420 error ("stdcall and cdecl attributes are not compatible");
2422 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2424 error ("fastcall and cdecl attributes are not compatible");
2428 /* Can combine sseregparm with all attributes. */
2433 /* Return 0 if the attributes for two types are incompatible, 1 if they
2434 are compatible, and 2 if they are nearly compatible (which causes a
2435 warning to be generated). */
2438 ix86_comp_type_attributes (tree type1, tree type2)
2440 /* Check for mismatch of non-default calling convention. */
2441 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2443 if (TREE_CODE (type1) != FUNCTION_TYPE)
2446 /* Check for mismatched fastcall/regparm types. */
2447 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2448 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2449 || (ix86_function_regparm (type1, NULL)
2450 != ix86_function_regparm (type2, NULL)))
2453 /* Check for mismatched sseregparm types. */
2454 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2455 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2458 /* Check for mismatched return types (cdecl vs stdcall). */
2459 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2460 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2466 /* Return the regparm value for a function with the indicated TYPE and DECL.
2467 DECL may be NULL when calling function indirectly
2468 or considering a libcall. */
2471 ix86_function_regparm (tree type, tree decl)
2474 int regparm = ix86_regparm;
2475 bool user_convention = false;
2479 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2482 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2483 user_convention = true;
2486 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2489 user_convention = true;
2492 /* Use register calling convention for local functions when possible. */
2493 if (!TARGET_64BIT && !user_convention && decl
2494 && flag_unit_at_a_time && !profile_flag)
2496 struct cgraph_local_info *i = cgraph_local_info (decl);
2499 int local_regparm, globals = 0, regno;
2501 /* Make sure no regparm register is taken by a global register
2503 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2504 if (global_regs[local_regparm])
2506 /* We can't use regparm(3) for nested functions as these use
2507 static chain pointer in third argument. */
2508 if (local_regparm == 3
2509 && decl_function_context (decl)
2510 && !DECL_NO_STATIC_CHAIN (decl))
2512 /* If the function realigns its stackpointer, the
2513 prologue will clobber %ecx. If we've already
2514 generated code for the callee, the callee
2515 DECL_STRUCT_FUNCTION is gone, so we fall back to
2516 scanning the attributes for the self-realigning
2518 if ((DECL_STRUCT_FUNCTION (decl)
2519 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2520 || (!DECL_STRUCT_FUNCTION (decl)
2521 && lookup_attribute (ix86_force_align_arg_pointer_string,
2522 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2524 /* Each global register variable increases register preassure,
2525 so the more global reg vars there are, the smaller regparm
2526 optimization use, unless requested by the user explicitly. */
2527 for (regno = 0; regno < 6; regno++)
2528 if (global_regs[regno])
2531 = globals < local_regparm ? local_regparm - globals : 0;
2533 if (local_regparm > regparm)
2534 regparm = local_regparm;
2541 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2542 in SSE registers for a function with the indicated TYPE and DECL.
2543 DECL may be NULL when calling function indirectly
2544 or considering a libcall. Otherwise return 0. */
2547 ix86_function_sseregparm (tree type, tree decl)
2549 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2550 by the sseregparm attribute. */
2551 if (TARGET_SSEREGPARM
2553 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2558 error ("Calling %qD with attribute sseregparm without "
2559 "SSE/SSE2 enabled", decl);
2561 error ("Calling %qT with attribute sseregparm without "
2562 "SSE/SSE2 enabled", type);
2569 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2570 in SSE registers even for 32-bit mode and not just 3, but up to
2571 8 SSE arguments in registers. */
2572 if (!TARGET_64BIT && decl
2573 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2575 struct cgraph_local_info *i = cgraph_local_info (decl);
2577 return TARGET_SSE2 ? 2 : 1;
2583 /* Return true if EAX is live at the start of the function. Used by
2584 ix86_expand_prologue to determine if we need special help before
2585 calling allocate_stack_worker. */
2588 ix86_eax_live_at_start_p (void)
2590 /* Cheat. Don't bother working forward from ix86_function_regparm
2591 to the function type to whether an actual argument is located in
2592 eax. Instead just look at cfg info, which is still close enough
2593 to correct at this point. This gives false positives for broken
2594 functions that might use uninitialized data that happens to be
2595 allocated in eax, but who cares? */
2596 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2599 /* Value is the number of bytes of arguments automatically
2600 popped when returning from a subroutine call.
2601 FUNDECL is the declaration node of the function (as a tree),
2602 FUNTYPE is the data type of the function (as a tree),
2603 or for a library call it is an identifier node for the subroutine name.
2604 SIZE is the number of bytes of arguments passed on the stack.
2606 On the 80386, the RTD insn may be used to pop them if the number
2607 of args is fixed, but if the number is variable then the caller
2608 must pop them all. RTD can't be used for library calls now
2609 because the library is compiled with the Unix compiler.
2610 Use of RTD is a selectable option, since it is incompatible with
2611 standard Unix calling sequences. If the option is not selected,
2612 the caller must always pop the args.
2614 The attribute stdcall is equivalent to RTD on a per module basis. */
2617 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2619 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2621 /* Cdecl functions override -mrtd, and never pop the stack. */
2622 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2624 /* Stdcall and fastcall functions will pop the stack if not
2626 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2627 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2631 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2632 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2633 == void_type_node)))
2637 /* Lose any fake structure return argument if it is passed on the stack. */
2638 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2640 && !KEEP_AGGREGATE_RETURN_POINTER)
2642 int nregs = ix86_function_regparm (funtype, fundecl);
2645 return GET_MODE_SIZE (Pmode);
2651 /* Argument support functions. */
2653 /* Return true when register may be used to pass function parameters. */
2655 ix86_function_arg_regno_p (int regno)
2659 return (regno < REGPARM_MAX
2660 || (TARGET_MMX && MMX_REGNO_P (regno)
2661 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2662 || (TARGET_SSE && SSE_REGNO_P (regno)
2663 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2665 if (TARGET_SSE && SSE_REGNO_P (regno)
2666 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2668 /* RAX is used as hidden argument to va_arg functions. */
2671 for (i = 0; i < REGPARM_MAX; i++)
2672 if (regno == x86_64_int_parameter_registers[i])
2677 /* Return if we do not know how to pass TYPE solely in registers. */
2680 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2682 if (must_pass_in_stack_var_size_or_pad (mode, type))
2685 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2686 The layout_type routine is crafty and tries to trick us into passing
2687 currently unsupported vector types on the stack by using TImode. */
2688 return (!TARGET_64BIT && mode == TImode
2689 && type && TREE_CODE (type) != VECTOR_TYPE);
2692 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2693 for a call to a function whose data type is FNTYPE.
2694 For a library call, FNTYPE is 0. */
2697 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2698 tree fntype, /* tree ptr for function decl */
2699 rtx libname, /* SYMBOL_REF of library name or 0 */
2702 static CUMULATIVE_ARGS zero_cum;
2703 tree param, next_param;
2705 if (TARGET_DEBUG_ARG)
2707 fprintf (stderr, "\ninit_cumulative_args (");
2709 fprintf (stderr, "fntype code = %s, ret code = %s",
2710 tree_code_name[(int) TREE_CODE (fntype)],
2711 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2713 fprintf (stderr, "no fntype");
2716 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2721 /* Set up the number of registers to use for passing arguments. */
2722 cum->nregs = ix86_regparm;
2724 cum->sse_nregs = SSE_REGPARM_MAX;
2726 cum->mmx_nregs = MMX_REGPARM_MAX;
2727 cum->warn_sse = true;
2728 cum->warn_mmx = true;
2729 cum->maybe_vaarg = false;
2731 /* Use ecx and edx registers if function has fastcall attribute,
2732 else look for regparm information. */
2733 if (fntype && !TARGET_64BIT)
2735 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2741 cum->nregs = ix86_function_regparm (fntype, fndecl);
2744 /* Set up the number of SSE registers used for passing SFmode
2745 and DFmode arguments. Warn for mismatching ABI. */
2746 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2748 /* Determine if this function has variable arguments. This is
2749 indicated by the last argument being 'void_type_mode' if there
2750 are no variable arguments. If there are variable arguments, then
2751 we won't pass anything in registers in 32-bit mode. */
2753 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2755 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2756 param != 0; param = next_param)
2758 next_param = TREE_CHAIN (param);
2759 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2769 cum->float_in_sse = 0;
2771 cum->maybe_vaarg = true;
2775 if ((!fntype && !libname)
2776 || (fntype && !TYPE_ARG_TYPES (fntype)))
2777 cum->maybe_vaarg = true;
2779 if (TARGET_DEBUG_ARG)
2780 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2785 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2786 But in the case of vector types, it is some vector mode.
2788 When we have only some of our vector isa extensions enabled, then there
2789 are some modes for which vector_mode_supported_p is false. For these
2790 modes, the generic vector support in gcc will choose some non-vector mode
2791 in order to implement the type. By computing the natural mode, we'll
2792 select the proper ABI location for the operand and not depend on whatever
2793 the middle-end decides to do with these vector types. */
2795 static enum machine_mode
2796 type_natural_mode (tree type)
2798 enum machine_mode mode = TYPE_MODE (type);
2800 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2802 HOST_WIDE_INT size = int_size_in_bytes (type);
2803 if ((size == 8 || size == 16)
2804 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2805 && TYPE_VECTOR_SUBPARTS (type) > 1)
2807 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2809 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2810 mode = MIN_MODE_VECTOR_FLOAT;
2812 mode = MIN_MODE_VECTOR_INT;
2814 /* Get the mode which has this inner mode and number of units. */
2815 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2816 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2817 && GET_MODE_INNER (mode) == innermode)
2827 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2828 this may not agree with the mode that the type system has chosen for the
2829 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2830 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2833 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2838 if (orig_mode != BLKmode)
2839 tmp = gen_rtx_REG (orig_mode, regno);
2842 tmp = gen_rtx_REG (mode, regno);
2843 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2844 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2850 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2851 of this code is to classify each 8bytes of incoming argument by the register
2852 class and assign registers accordingly. */
2854 /* Return the union class of CLASS1 and CLASS2.
2855 See the x86-64 PS ABI for details. */
2857 static enum x86_64_reg_class
2858 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2860 /* Rule #1: If both classes are equal, this is the resulting class. */
2861 if (class1 == class2)
2864 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2866 if (class1 == X86_64_NO_CLASS)
2868 if (class2 == X86_64_NO_CLASS)
2871 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2872 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2873 return X86_64_MEMORY_CLASS;
2875 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2876 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2877 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2878 return X86_64_INTEGERSI_CLASS;
2879 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2880 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2881 return X86_64_INTEGER_CLASS;
2883 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2885 if (class1 == X86_64_X87_CLASS
2886 || class1 == X86_64_X87UP_CLASS
2887 || class1 == X86_64_COMPLEX_X87_CLASS
2888 || class2 == X86_64_X87_CLASS
2889 || class2 == X86_64_X87UP_CLASS
2890 || class2 == X86_64_COMPLEX_X87_CLASS)
2891 return X86_64_MEMORY_CLASS;
2893 /* Rule #6: Otherwise class SSE is used. */
2894 return X86_64_SSE_CLASS;
2897 /* Classify the argument of type TYPE and mode MODE.
2898 CLASSES will be filled by the register class used to pass each word
2899 of the operand. The number of words is returned. In case the parameter
2900 should be passed in memory, 0 is returned. As a special case for zero
2901 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2903 BIT_OFFSET is used internally for handling records and specifies offset
2904 of the offset in bits modulo 256 to avoid overflow cases.
2906 See the x86-64 PS ABI for details.
2910 classify_argument (enum machine_mode mode, tree type,
2911 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2913 HOST_WIDE_INT bytes =
2914 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2915 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2917 /* Variable sized entities are always passed/returned in memory. */
2921 if (mode != VOIDmode
2922 && targetm.calls.must_pass_in_stack (mode, type))
2925 if (type && AGGREGATE_TYPE_P (type))
2929 enum x86_64_reg_class subclasses[MAX_CLASSES];
2931 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2935 for (i = 0; i < words; i++)
2936 classes[i] = X86_64_NO_CLASS;
2938 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2939 signalize memory class, so handle it as special case. */
2942 classes[0] = X86_64_NO_CLASS;
2946 /* Classify each field of record and merge classes. */
2947 switch (TREE_CODE (type))
2950 /* For classes first merge in the field of the subclasses. */
2951 if (TYPE_BINFO (type))
2953 tree binfo, base_binfo;
2956 for (binfo = TYPE_BINFO (type), basenum = 0;
2957 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2960 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2961 tree type = BINFO_TYPE (base_binfo);
2963 num = classify_argument (TYPE_MODE (type),
2965 (offset + bit_offset) % 256);
2968 for (i = 0; i < num; i++)
2970 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2972 merge_classes (subclasses[i], classes[i + pos]);
2976 /* And now merge the fields of structure. */
2977 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2979 if (TREE_CODE (field) == FIELD_DECL)
2983 if (TREE_TYPE (field) == error_mark_node)
2986 /* Bitfields are always classified as integer. Handle them
2987 early, since later code would consider them to be
2988 misaligned integers. */
2989 if (DECL_BIT_FIELD (field))
2991 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2992 i < ((int_bit_position (field) + (bit_offset % 64))
2993 + tree_low_cst (DECL_SIZE (field), 0)
2996 merge_classes (X86_64_INTEGER_CLASS,
3001 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3002 TREE_TYPE (field), subclasses,
3003 (int_bit_position (field)
3004 + bit_offset) % 256);
3007 for (i = 0; i < num; i++)
3010 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3012 merge_classes (subclasses[i], classes[i + pos]);
3020 /* Arrays are handled as small records. */
3023 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3024 TREE_TYPE (type), subclasses, bit_offset);
3028 /* The partial classes are now full classes. */
3029 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3030 subclasses[0] = X86_64_SSE_CLASS;
3031 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3032 subclasses[0] = X86_64_INTEGER_CLASS;
3034 for (i = 0; i < words; i++)
3035 classes[i] = subclasses[i % num];
3040 case QUAL_UNION_TYPE:
3041 /* Unions are similar to RECORD_TYPE but offset is always 0.
3044 /* Unions are not derived. */
3045 gcc_assert (!TYPE_BINFO (type)
3046 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3047 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3049 if (TREE_CODE (field) == FIELD_DECL)
3053 if (TREE_TYPE (field) == error_mark_node)
3056 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3057 TREE_TYPE (field), subclasses,
3061 for (i = 0; i < num; i++)
3062 classes[i] = merge_classes (subclasses[i], classes[i]);
3071 /* Final merger cleanup. */
3072 for (i = 0; i < words; i++)
3074 /* If one class is MEMORY, everything should be passed in
3076 if (classes[i] == X86_64_MEMORY_CLASS)
3079 /* The X86_64_SSEUP_CLASS should be always preceded by
3080 X86_64_SSE_CLASS. */
3081 if (classes[i] == X86_64_SSEUP_CLASS
3082 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3083 classes[i] = X86_64_SSE_CLASS;
3085 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3086 if (classes[i] == X86_64_X87UP_CLASS
3087 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3088 classes[i] = X86_64_SSE_CLASS;
3093 /* Compute alignment needed. We align all types to natural boundaries with
3094 exception of XFmode that is aligned to 64bits. */
3095 if (mode != VOIDmode && mode != BLKmode)
3097 int mode_alignment = GET_MODE_BITSIZE (mode);
3100 mode_alignment = 128;
3101 else if (mode == XCmode)
3102 mode_alignment = 256;
3103 if (COMPLEX_MODE_P (mode))
3104 mode_alignment /= 2;
3105 /* Misaligned fields are always returned in memory. */
3106 if (bit_offset % mode_alignment)
3110 /* for V1xx modes, just use the base mode */
3111 if (VECTOR_MODE_P (mode)
3112 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3113 mode = GET_MODE_INNER (mode);
3115 /* Classification of atomic types. */
3120 classes[0] = X86_64_SSE_CLASS;
3123 classes[0] = X86_64_SSE_CLASS;
3124 classes[1] = X86_64_SSEUP_CLASS;
3133 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3134 classes[0] = X86_64_INTEGERSI_CLASS;
3136 classes[0] = X86_64_INTEGER_CLASS;
3140 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3145 if (!(bit_offset % 64))
3146 classes[0] = X86_64_SSESF_CLASS;
3148 classes[0] = X86_64_SSE_CLASS;
3151 classes[0] = X86_64_SSEDF_CLASS;
3154 classes[0] = X86_64_X87_CLASS;
3155 classes[1] = X86_64_X87UP_CLASS;
3158 classes[0] = X86_64_SSE_CLASS;
3159 classes[1] = X86_64_SSEUP_CLASS;
3162 classes[0] = X86_64_SSE_CLASS;
3165 classes[0] = X86_64_SSEDF_CLASS;
3166 classes[1] = X86_64_SSEDF_CLASS;
3169 classes[0] = X86_64_COMPLEX_X87_CLASS;
3172 /* This modes is larger than 16 bytes. */
3180 classes[0] = X86_64_SSE_CLASS;
3181 classes[1] = X86_64_SSEUP_CLASS;
3187 classes[0] = X86_64_SSE_CLASS;
3193 gcc_assert (VECTOR_MODE_P (mode));
3198 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3200 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3201 classes[0] = X86_64_INTEGERSI_CLASS;
3203 classes[0] = X86_64_INTEGER_CLASS;
3204 classes[1] = X86_64_INTEGER_CLASS;
3205 return 1 + (bytes > 8);
3209 /* Examine the argument and return set number of register required in each
3210 class. Return 0 iff parameter should be passed in memory. */
3212 examine_argument (enum machine_mode mode, tree type, int in_return,
3213 int *int_nregs, int *sse_nregs)
3215 enum x86_64_reg_class class[MAX_CLASSES];
3216 int n = classify_argument (mode, type, class, 0);
3222 for (n--; n >= 0; n--)
3225 case X86_64_INTEGER_CLASS:
3226 case X86_64_INTEGERSI_CLASS:
3229 case X86_64_SSE_CLASS:
3230 case X86_64_SSESF_CLASS:
3231 case X86_64_SSEDF_CLASS:
3234 case X86_64_NO_CLASS:
3235 case X86_64_SSEUP_CLASS:
3237 case X86_64_X87_CLASS:
3238 case X86_64_X87UP_CLASS:
3242 case X86_64_COMPLEX_X87_CLASS:
3243 return in_return ? 2 : 0;
3244 case X86_64_MEMORY_CLASS:
3250 /* Construct container for the argument used by GCC interface. See
3251 FUNCTION_ARG for the detailed description. */
3254 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3255 tree type, int in_return, int nintregs, int nsseregs,
3256 const int *intreg, int sse_regno)
3258 /* The following variables hold the static issued_error state. */
3259 static bool issued_sse_arg_error;
3260 static bool issued_sse_ret_error;
3261 static bool issued_x87_ret_error;
3263 enum machine_mode tmpmode;
3265 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3266 enum x86_64_reg_class class[MAX_CLASSES];
3270 int needed_sseregs, needed_intregs;
3271 rtx exp[MAX_CLASSES];
3274 n = classify_argument (mode, type, class, 0);
3275 if (TARGET_DEBUG_ARG)
3278 fprintf (stderr, "Memory class\n");
3281 fprintf (stderr, "Classes:");
3282 for (i = 0; i < n; i++)
3284 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3286 fprintf (stderr, "\n");
3291 if (!examine_argument (mode, type, in_return, &needed_intregs,
3294 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3297 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3298 some less clueful developer tries to use floating-point anyway. */
3299 if (needed_sseregs && !TARGET_SSE)
3303 if (!issued_sse_ret_error)
3305 error ("SSE register return with SSE disabled");
3306 issued_sse_ret_error = true;
3309 else if (!issued_sse_arg_error)
3311 error ("SSE register argument with SSE disabled");
3312 issued_sse_arg_error = true;
3317 /* Likewise, error if the ABI requires us to return values in the
3318 x87 registers and the user specified -mno-80387. */
3319 if (!TARGET_80387 && in_return)
3320 for (i = 0; i < n; i++)
3321 if (class[i] == X86_64_X87_CLASS
3322 || class[i] == X86_64_X87UP_CLASS
3323 || class[i] == X86_64_COMPLEX_X87_CLASS)
3325 if (!issued_x87_ret_error)
3327 error ("x87 register return with x87 disabled");
3328 issued_x87_ret_error = true;
3333 /* First construct simple cases. Avoid SCmode, since we want to use
3334 single register to pass this type. */
3335 if (n == 1 && mode != SCmode)
3338 case X86_64_INTEGER_CLASS:
3339 case X86_64_INTEGERSI_CLASS:
3340 return gen_rtx_REG (mode, intreg[0]);
3341 case X86_64_SSE_CLASS:
3342 case X86_64_SSESF_CLASS:
3343 case X86_64_SSEDF_CLASS:
3344 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3345 case X86_64_X87_CLASS:
3346 case X86_64_COMPLEX_X87_CLASS:
3347 return gen_rtx_REG (mode, FIRST_STACK_REG);
3348 case X86_64_NO_CLASS:
3349 /* Zero sized array, struct or class. */
3354 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3356 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3358 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3359 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3360 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3361 && class[1] == X86_64_INTEGER_CLASS
3362 && (mode == CDImode || mode == TImode || mode == TFmode)
3363 && intreg[0] + 1 == intreg[1])
3364 return gen_rtx_REG (mode, intreg[0]);
3366 /* Otherwise figure out the entries of the PARALLEL. */
3367 for (i = 0; i < n; i++)
3371 case X86_64_NO_CLASS:
3373 case X86_64_INTEGER_CLASS:
3374 case X86_64_INTEGERSI_CLASS:
3375 /* Merge TImodes on aligned occasions here too. */
3376 if (i * 8 + 8 > bytes)
3377 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3378 else if (class[i] == X86_64_INTEGERSI_CLASS)
3382 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3383 if (tmpmode == BLKmode)
3385 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3386 gen_rtx_REG (tmpmode, *intreg),
3390 case X86_64_SSESF_CLASS:
3391 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3392 gen_rtx_REG (SFmode,
3393 SSE_REGNO (sse_regno)),
3397 case X86_64_SSEDF_CLASS:
3398 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3399 gen_rtx_REG (DFmode,
3400 SSE_REGNO (sse_regno)),
3404 case X86_64_SSE_CLASS:
3405 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3409 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3410 gen_rtx_REG (tmpmode,
3411 SSE_REGNO (sse_regno)),
3413 if (tmpmode == TImode)
3422 /* Empty aligned struct, union or class. */
3426 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3427 for (i = 0; i < nexps; i++)
3428 XVECEXP (ret, 0, i) = exp [i];
3432 /* Update the data in CUM to advance over an argument
3433 of mode MODE and data type TYPE.
3434 (TYPE is null for libcalls where that information may not be available.) */
3437 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3438 tree type, int named)
3441 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3442 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3445 mode = type_natural_mode (type);
3447 if (TARGET_DEBUG_ARG)
3448 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3449 "mode=%s, named=%d)\n\n",
3450 words, cum->words, cum->nregs, cum->sse_nregs,
3451 GET_MODE_NAME (mode), named);
3455 int int_nregs, sse_nregs;
3456 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3457 cum->words += words;
3458 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3460 cum->nregs -= int_nregs;
3461 cum->sse_nregs -= sse_nregs;
3462 cum->regno += int_nregs;
3463 cum->sse_regno += sse_nregs;
3466 cum->words += words;
3484 cum->words += words;
3485 cum->nregs -= words;
3486 cum->regno += words;
3488 if (cum->nregs <= 0)
3496 if (cum->float_in_sse < 2)
3499 if (cum->float_in_sse < 1)
3510 if (!type || !AGGREGATE_TYPE_P (type))
3512 cum->sse_words += words;
3513 cum->sse_nregs -= 1;
3514 cum->sse_regno += 1;
3515 if (cum->sse_nregs <= 0)
3527 if (!type || !AGGREGATE_TYPE_P (type))
3529 cum->mmx_words += words;
3530 cum->mmx_nregs -= 1;
3531 cum->mmx_regno += 1;
3532 if (cum->mmx_nregs <= 0)
3543 /* Define where to put the arguments to a function.
3544 Value is zero to push the argument on the stack,
3545 or a hard register in which to store the argument.
3547 MODE is the argument's machine mode.
3548 TYPE is the data type of the argument (as a tree).
3549 This is null for libcalls where that information may
3551 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3552 the preceding args and about the function being called.
3553 NAMED is nonzero if this argument is a named parameter
3554 (otherwise it is an extra parameter matching an ellipsis). */
3557 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3558 tree type, int named)
3560 enum machine_mode mode = orig_mode;
3563 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3564 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3565 static bool warnedsse, warnedmmx;
3567 /* To simplify the code below, represent vector types with a vector mode
3568 even if MMX/SSE are not active. */
3569 if (type && TREE_CODE (type) == VECTOR_TYPE)
3570 mode = type_natural_mode (type);
3572 /* Handle a hidden AL argument containing number of registers for varargs
3573 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3575 if (mode == VOIDmode)
3578 return GEN_INT (cum->maybe_vaarg
3579 ? (cum->sse_nregs < 0
3587 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3589 &x86_64_int_parameter_registers [cum->regno],
3594 /* For now, pass fp/complex values on the stack. */
3606 if (words <= cum->nregs)
3608 int regno = cum->regno;
3610 /* Fastcall allocates the first two DWORD (SImode) or
3611 smaller arguments to ECX and EDX. */
3614 if (mode == BLKmode || mode == DImode)
3617 /* ECX not EAX is the first allocated register. */
3621 ret = gen_rtx_REG (mode, regno);
3625 if (cum->float_in_sse < 2)
3628 if (cum->float_in_sse < 1)
3638 if (!type || !AGGREGATE_TYPE_P (type))
3640 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3643 warning (0, "SSE vector argument without SSE enabled "
3647 ret = gen_reg_or_parallel (mode, orig_mode,
3648 cum->sse_regno + FIRST_SSE_REG);
3655 if (!type || !AGGREGATE_TYPE_P (type))
3657 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3660 warning (0, "MMX vector argument without MMX enabled "
3664 ret = gen_reg_or_parallel (mode, orig_mode,
3665 cum->mmx_regno + FIRST_MMX_REG);
3670 if (TARGET_DEBUG_ARG)
3673 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3674 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3677 print_simple_rtl (stderr, ret);
3679 fprintf (stderr, ", stack");
3681 fprintf (stderr, " )\n");
3687 /* A C expression that indicates when an argument must be passed by
3688 reference. If nonzero for an argument, a copy of that argument is
3689 made in memory and a pointer to the argument is passed instead of
3690 the argument itself. The pointer is passed in whatever way is
3691 appropriate for passing a pointer to that type. */
3694 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3695 enum machine_mode mode ATTRIBUTE_UNUSED,
3696 tree type, bool named ATTRIBUTE_UNUSED)
3701 if (type && int_size_in_bytes (type) == -1)
3703 if (TARGET_DEBUG_ARG)
3704 fprintf (stderr, "function_arg_pass_by_reference\n");
3711 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3712 ABI. Only called if TARGET_SSE. */
3714 contains_128bit_aligned_vector_p (tree type)
3716 enum machine_mode mode = TYPE_MODE (type);
3717 if (SSE_REG_MODE_P (mode)
3718 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3720 if (TYPE_ALIGN (type) < 128)
3723 if (AGGREGATE_TYPE_P (type))
3725 /* Walk the aggregates recursively. */
3726 switch (TREE_CODE (type))
3730 case QUAL_UNION_TYPE:
3734 if (TYPE_BINFO (type))
3736 tree binfo, base_binfo;
3739 for (binfo = TYPE_BINFO (type), i = 0;
3740 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3741 if (contains_128bit_aligned_vector_p
3742 (BINFO_TYPE (base_binfo)))
3745 /* And now merge the fields of structure. */
3746 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3748 if (TREE_CODE (field) == FIELD_DECL
3749 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3756 /* Just for use if some languages passes arrays by value. */
3757 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3768 /* Gives the alignment boundary, in bits, of an argument with the
3769 specified mode and type. */
3772 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3776 align = TYPE_ALIGN (type);
3778 align = GET_MODE_ALIGNMENT (mode);
3779 if (align < PARM_BOUNDARY)
3780 align = PARM_BOUNDARY;
3783 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3784 make an exception for SSE modes since these require 128bit
3787 The handling here differs from field_alignment. ICC aligns MMX
3788 arguments to 4 byte boundaries, while structure fields are aligned
3789 to 8 byte boundaries. */
3791 align = PARM_BOUNDARY;
3794 if (!SSE_REG_MODE_P (mode))
3795 align = PARM_BOUNDARY;
3799 if (!contains_128bit_aligned_vector_p (type))
3800 align = PARM_BOUNDARY;
3808 /* Return true if N is a possible register number of function value. */
3810 ix86_function_value_regno_p (int regno)
3813 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3814 || (regno == FIRST_SSE_REG && TARGET_SSE))
3818 && (regno == FIRST_MMX_REG && TARGET_MMX))
3824 /* Define how to find the value returned by a function.
3825 VALTYPE is the data type of the value (as a tree).
3826 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3827 otherwise, FUNC is 0. */
3829 ix86_function_value (tree valtype, tree fntype_or_decl,
3830 bool outgoing ATTRIBUTE_UNUSED)
3832 enum machine_mode natmode = type_natural_mode (valtype);
3836 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3837 1, REGPARM_MAX, SSE_REGPARM_MAX,
3838 x86_64_int_return_registers, 0);
3839 /* For zero sized structures, construct_container return NULL, but we
3840 need to keep rest of compiler happy by returning meaningful value. */
3842 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3847 tree fn = NULL_TREE, fntype;
3849 && DECL_P (fntype_or_decl))
3850 fn = fntype_or_decl;
3851 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3852 return gen_rtx_REG (TYPE_MODE (valtype),
3853 ix86_value_regno (natmode, fn, fntype));
3857 /* Return true iff type is returned in memory. */
3859 ix86_return_in_memory (tree type)
3861 int needed_intregs, needed_sseregs, size;
3862 enum machine_mode mode = type_natural_mode (type);
3865 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3867 if (mode == BLKmode)
3870 size = int_size_in_bytes (type);
3872 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3875 if (VECTOR_MODE_P (mode) || mode == TImode)
3877 /* User-created vectors small enough to fit in EAX. */
3881 /* MMX/3dNow values are returned in MM0,
3882 except when it doesn't exits. */
3884 return (TARGET_MMX ? 0 : 1);
3886 /* SSE values are returned in XMM0, except when it doesn't exist. */
3888 return (TARGET_SSE ? 0 : 1);
3902 /* When returning SSE vector types, we have a choice of either
3903 (1) being abi incompatible with a -march switch, or
3904 (2) generating an error.
3905 Given no good solution, I think the safest thing is one warning.
3906 The user won't be able to use -Werror, but....
3908 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3909 called in response to actually generating a caller or callee that
3910 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3911 via aggregate_value_p for general type probing from tree-ssa. */
3914 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3916 static bool warnedsse, warnedmmx;
3920 /* Look at the return type of the function, not the function type. */
3921 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3923 if (!TARGET_SSE && !warnedsse)
3926 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3929 warning (0, "SSE vector return without SSE enabled "
3934 if (!TARGET_MMX && !warnedmmx)
3936 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3939 warning (0, "MMX vector return without MMX enabled "
3948 /* Define how to find the value returned by a library function
3949 assuming the value has mode MODE. */
3951 ix86_libcall_value (enum machine_mode mode)
3965 return gen_rtx_REG (mode, FIRST_SSE_REG);
3968 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3972 return gen_rtx_REG (mode, 0);
3976 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3979 /* Given a mode, return the register to use for a return value. */
3982 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3984 gcc_assert (!TARGET_64BIT);
3986 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3987 we normally prevent this case when mmx is not available. However
3988 some ABIs may require the result to be returned like DImode. */
3989 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3990 return TARGET_MMX ? FIRST_MMX_REG : 0;
3992 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3993 we prevent this case when sse is not available. However some ABIs
3994 may require the result to be returned like integer TImode. */
3995 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3996 return TARGET_SSE ? FIRST_SSE_REG : 0;
3998 /* Decimal floating point values can go in %eax, unlike other float modes. */
3999 if (DECIMAL_FLOAT_MODE_P (mode))
4002 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4003 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4006 /* Floating point return values in %st(0), except for local functions when
4007 SSE math is enabled or for functions with sseregparm attribute. */
4008 if ((func || fntype)
4009 && (mode == SFmode || mode == DFmode))
4011 int sse_level = ix86_function_sseregparm (fntype, func);
4012 if ((sse_level >= 1 && mode == SFmode)
4013 || (sse_level == 2 && mode == DFmode))
4014 return FIRST_SSE_REG;
4017 return FIRST_FLOAT_REG;
4020 /* Create the va_list data type. */
4023 ix86_build_builtin_va_list (void)
4025 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4027 /* For i386 we use plain pointer to argument area. */
4029 return build_pointer_type (char_type_node);
4031 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4032 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4034 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4035 unsigned_type_node);
4036 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4037 unsigned_type_node);
4038 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4040 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4043 va_list_gpr_counter_field = f_gpr;
4044 va_list_fpr_counter_field = f_fpr;
4046 DECL_FIELD_CONTEXT (f_gpr) = record;
4047 DECL_FIELD_CONTEXT (f_fpr) = record;
4048 DECL_FIELD_CONTEXT (f_ovf) = record;
4049 DECL_FIELD_CONTEXT (f_sav) = record;
4051 TREE_CHAIN (record) = type_decl;
4052 TYPE_NAME (record) = type_decl;
4053 TYPE_FIELDS (record) = f_gpr;
4054 TREE_CHAIN (f_gpr) = f_fpr;
4055 TREE_CHAIN (f_fpr) = f_ovf;
4056 TREE_CHAIN (f_ovf) = f_sav;
4058 layout_type (record);
4060 /* The correct type is an array type of one element. */
4061 return build_array_type (record, build_index_type (size_zero_node));
4064 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4067 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4068 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4071 CUMULATIVE_ARGS next_cum;
4072 rtx save_area = NULL_RTX, mem;
4085 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4088 /* Indicate to allocate space on the stack for varargs save area. */
4089 ix86_save_varrargs_registers = 1;
4091 cfun->stack_alignment_needed = 128;
4093 fntype = TREE_TYPE (current_function_decl);
4094 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4095 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4096 != void_type_node));
4098 /* For varargs, we do not want to skip the dummy va_dcl argument.
4099 For stdargs, we do want to skip the last named argument. */
4102 function_arg_advance (&next_cum, mode, type, 1);
4105 save_area = frame_pointer_rtx;
4107 set = get_varargs_alias_set ();
4109 for (i = next_cum.regno;
4111 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4114 mem = gen_rtx_MEM (Pmode,
4115 plus_constant (save_area, i * UNITS_PER_WORD));
4116 MEM_NOTRAP_P (mem) = 1;
4117 set_mem_alias_set (mem, set);
4118 emit_move_insn (mem, gen_rtx_REG (Pmode,
4119 x86_64_int_parameter_registers[i]));
4122 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4124 /* Now emit code to save SSE registers. The AX parameter contains number
4125 of SSE parameter registers used to call this function. We use
4126 sse_prologue_save insn template that produces computed jump across
4127 SSE saves. We need some preparation work to get this working. */
4129 label = gen_label_rtx ();
4130 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4132 /* Compute address to jump to :
4133 label - 5*eax + nnamed_sse_arguments*5 */
4134 tmp_reg = gen_reg_rtx (Pmode);
4135 nsse_reg = gen_reg_rtx (Pmode);
4136 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4137 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4138 gen_rtx_MULT (Pmode, nsse_reg,
4140 if (next_cum.sse_regno)
4143 gen_rtx_CONST (DImode,
4144 gen_rtx_PLUS (DImode,
4146 GEN_INT (next_cum.sse_regno * 4))));
4148 emit_move_insn (nsse_reg, label_ref);
4149 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4151 /* Compute address of memory block we save into. We always use pointer
4152 pointing 127 bytes after first byte to store - this is needed to keep
4153 instruction size limited by 4 bytes. */
4154 tmp_reg = gen_reg_rtx (Pmode);
4155 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4156 plus_constant (save_area,
4157 8 * REGPARM_MAX + 127)));
4158 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4159 MEM_NOTRAP_P (mem) = 1;
4160 set_mem_alias_set (mem, set);
4161 set_mem_align (mem, BITS_PER_WORD);
4163 /* And finally do the dirty job! */
4164 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4165 GEN_INT (next_cum.sse_regno), label));
4170 /* Implement va_start. */
4173 ix86_va_start (tree valist, rtx nextarg)
4175 HOST_WIDE_INT words, n_gpr, n_fpr;
4176 tree f_gpr, f_fpr, f_ovf, f_sav;
4177 tree gpr, fpr, ovf, sav, t;
4180 /* Only 64bit target needs something special. */
4183 std_expand_builtin_va_start (valist, nextarg);
4187 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4188 f_fpr = TREE_CHAIN (f_gpr);
4189 f_ovf = TREE_CHAIN (f_fpr);
4190 f_sav = TREE_CHAIN (f_ovf);
4192 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4193 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4194 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4195 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4196 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4198 /* Count number of gp and fp argument registers used. */
4199 words = current_function_args_info.words;
4200 n_gpr = current_function_args_info.regno;
4201 n_fpr = current_function_args_info.sse_regno;
4203 if (TARGET_DEBUG_ARG)
4204 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4205 (int) words, (int) n_gpr, (int) n_fpr);
4207 if (cfun->va_list_gpr_size)
4209 type = TREE_TYPE (gpr);
4210 t = build2 (MODIFY_EXPR, type, gpr,
4211 build_int_cst (type, n_gpr * 8));
4212 TREE_SIDE_EFFECTS (t) = 1;
4213 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4216 if (cfun->va_list_fpr_size)
4218 type = TREE_TYPE (fpr);
4219 t = build2 (MODIFY_EXPR, type, fpr,
4220 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4221 TREE_SIDE_EFFECTS (t) = 1;
4222 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4225 /* Find the overflow area. */
4226 type = TREE_TYPE (ovf);
4227 t = make_tree (type, virtual_incoming_args_rtx);
4229 t = build2 (PLUS_EXPR, type, t,
4230 build_int_cst (type, words * UNITS_PER_WORD));
4231 t = build2 (MODIFY_EXPR, type, ovf, t);
4232 TREE_SIDE_EFFECTS (t) = 1;
4233 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4235 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4237 /* Find the register save area.
4238 Prologue of the function save it right above stack frame. */
4239 type = TREE_TYPE (sav);
4240 t = make_tree (type, frame_pointer_rtx);
4241 t = build2 (MODIFY_EXPR, type, sav, t);
4242 TREE_SIDE_EFFECTS (t) = 1;
4243 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4247 /* Implement va_arg. */
4250 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4252 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4253 tree f_gpr, f_fpr, f_ovf, f_sav;
4254 tree gpr, fpr, ovf, sav, t;
4256 tree lab_false, lab_over = NULL_TREE;
4261 enum machine_mode nat_mode;
4263 /* Only 64bit target needs something special. */
4265 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4267 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4268 f_fpr = TREE_CHAIN (f_gpr);
4269 f_ovf = TREE_CHAIN (f_fpr);
4270 f_sav = TREE_CHAIN (f_ovf);
4272 valist = build_va_arg_indirect_ref (valist);
4273 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4274 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4275 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4276 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4278 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4280 type = build_pointer_type (type);
4281 size = int_size_in_bytes (type);
4282 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4284 nat_mode = type_natural_mode (type);
4285 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4286 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4288 /* Pull the value out of the saved registers. */
4290 addr = create_tmp_var (ptr_type_node, "addr");
4291 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4295 int needed_intregs, needed_sseregs;
4297 tree int_addr, sse_addr;
4299 lab_false = create_artificial_label ();
4300 lab_over = create_artificial_label ();
4302 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4304 need_temp = (!REG_P (container)
4305 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4306 || TYPE_ALIGN (type) > 128));
4308 /* In case we are passing structure, verify that it is consecutive block
4309 on the register save area. If not we need to do moves. */
4310 if (!need_temp && !REG_P (container))
4312 /* Verify that all registers are strictly consecutive */
4313 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4317 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4319 rtx slot = XVECEXP (container, 0, i);
4320 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4321 || INTVAL (XEXP (slot, 1)) != i * 16)
4329 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4331 rtx slot = XVECEXP (container, 0, i);
4332 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4333 || INTVAL (XEXP (slot, 1)) != i * 8)
4345 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4346 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4347 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4348 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4351 /* First ensure that we fit completely in registers. */
4354 t = build_int_cst (TREE_TYPE (gpr),
4355 (REGPARM_MAX - needed_intregs + 1) * 8);
4356 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4357 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4358 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4359 gimplify_and_add (t, pre_p);
4363 t = build_int_cst (TREE_TYPE (fpr),
4364 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4366 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4367 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4368 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4369 gimplify_and_add (t, pre_p);
4372 /* Compute index to start of area used for integer regs. */
4375 /* int_addr = gpr + sav; */
4376 t = fold_convert (ptr_type_node, gpr);
4377 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4378 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4379 gimplify_and_add (t, pre_p);
4383 /* sse_addr = fpr + sav; */
4384 t = fold_convert (ptr_type_node, fpr);
4385 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4386 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4387 gimplify_and_add (t, pre_p);
4392 tree temp = create_tmp_var (type, "va_arg_tmp");
4395 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4396 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4397 gimplify_and_add (t, pre_p);
4399 for (i = 0; i < XVECLEN (container, 0); i++)
4401 rtx slot = XVECEXP (container, 0, i);
4402 rtx reg = XEXP (slot, 0);
4403 enum machine_mode mode = GET_MODE (reg);
4404 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4405 tree addr_type = build_pointer_type (piece_type);
4408 tree dest_addr, dest;
4410 if (SSE_REGNO_P (REGNO (reg)))
4412 src_addr = sse_addr;
4413 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4417 src_addr = int_addr;
4418 src_offset = REGNO (reg) * 8;
4420 src_addr = fold_convert (addr_type, src_addr);
4421 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4422 size_int (src_offset)));
4423 src = build_va_arg_indirect_ref (src_addr);
4425 dest_addr = fold_convert (addr_type, addr);
4426 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4427 size_int (INTVAL (XEXP (slot, 1)))));
4428 dest = build_va_arg_indirect_ref (dest_addr);
4430 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4431 gimplify_and_add (t, pre_p);
4437 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4438 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4439 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4440 gimplify_and_add (t, pre_p);
4444 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4445 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4446 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4447 gimplify_and_add (t, pre_p);
4450 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4451 gimplify_and_add (t, pre_p);
4453 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4454 append_to_statement_list (t, pre_p);
4457 /* ... otherwise out of the overflow area. */
4459 /* Care for on-stack alignment if needed. */
4460 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4461 || integer_zerop (TYPE_SIZE (type)))
4465 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4466 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4467 build_int_cst (TREE_TYPE (ovf), align - 1));
4468 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4469 build_int_cst (TREE_TYPE (t), -align));
4471 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4473 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4474 gimplify_and_add (t2, pre_p);
4476 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4477 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4478 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4479 gimplify_and_add (t, pre_p);
4483 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4484 append_to_statement_list (t, pre_p);
4487 ptrtype = build_pointer_type (type);
4488 addr = fold_convert (ptrtype, addr);
4491 addr = build_va_arg_indirect_ref (addr);
4492 return build_va_arg_indirect_ref (addr);
4495 /* Return nonzero if OPNUM's MEM should be matched
4496 in movabs* patterns. */
4499 ix86_check_movabs (rtx insn, int opnum)
4503 set = PATTERN (insn);
4504 if (GET_CODE (set) == PARALLEL)
4505 set = XVECEXP (set, 0, 0);
4506 gcc_assert (GET_CODE (set) == SET);
4507 mem = XEXP (set, opnum);
4508 while (GET_CODE (mem) == SUBREG)
4509 mem = SUBREG_REG (mem);
4510 gcc_assert (GET_CODE (mem) == MEM);
4511 return (volatile_ok || !MEM_VOLATILE_P (mem));
4514 /* Initialize the table of extra 80387 mathematical constants. */
4517 init_ext_80387_constants (void)
4519 static const char * cst[5] =
4521 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4522 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4523 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4524 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4525 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4529 for (i = 0; i < 5; i++)
4531 real_from_string (&ext_80387_constants_table[i], cst[i]);
4532 /* Ensure each constant is rounded to XFmode precision. */
4533 real_convert (&ext_80387_constants_table[i],
4534 XFmode, &ext_80387_constants_table[i]);
4537 ext_80387_constants_init = 1;
4540 /* Return true if the constant is something that can be loaded with
4541 a special instruction. */
4544 standard_80387_constant_p (rtx x)
4546 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4549 if (x == CONST0_RTX (GET_MODE (x)))
4551 if (x == CONST1_RTX (GET_MODE (x)))
4554 /* For XFmode constants, try to find a special 80387 instruction when
4555 optimizing for size or on those CPUs that benefit from them. */
4556 if (GET_MODE (x) == XFmode
4557 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4562 if (! ext_80387_constants_init)
4563 init_ext_80387_constants ();
4565 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4566 for (i = 0; i < 5; i++)
4567 if (real_identical (&r, &ext_80387_constants_table[i]))
4574 /* Return the opcode of the special instruction to be used to load
4578 standard_80387_constant_opcode (rtx x)
4580 switch (standard_80387_constant_p (x))
4601 /* Return the CONST_DOUBLE representing the 80387 constant that is
4602 loaded by the specified special instruction. The argument IDX
4603 matches the return value from standard_80387_constant_p. */
4606 standard_80387_constant_rtx (int idx)
4610 if (! ext_80387_constants_init)
4611 init_ext_80387_constants ();
4627 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4631 /* Return 1 if mode is a valid mode for sse. */
4633 standard_sse_mode_p (enum machine_mode mode)
4650 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4653 standard_sse_constant_p (rtx x)
4655 enum machine_mode mode = GET_MODE (x);
4657 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4659 if (vector_all_ones_operand (x, mode)
4660 && standard_sse_mode_p (mode))
4661 return TARGET_SSE2 ? 2 : -1;
4666 /* Return the opcode of the special instruction to be used to load
4670 standard_sse_constant_opcode (rtx insn, rtx x)
4672 switch (standard_sse_constant_p (x))
4675 if (get_attr_mode (insn) == MODE_V4SF)
4676 return "xorps\t%0, %0";
4677 else if (get_attr_mode (insn) == MODE_V2DF)
4678 return "xorpd\t%0, %0";
4680 return "pxor\t%0, %0";
4682 return "pcmpeqd\t%0, %0";
4687 /* Returns 1 if OP contains a symbol reference */
4690 symbolic_reference_mentioned_p (rtx op)
4695 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4698 fmt = GET_RTX_FORMAT (GET_CODE (op));
4699 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4705 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4706 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4710 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4717 /* Return 1 if it is appropriate to emit `ret' instructions in the
4718 body of a function. Do this only if the epilogue is simple, needing a
4719 couple of insns. Prior to reloading, we can't tell how many registers
4720 must be saved, so return 0 then. Return 0 if there is no frame
4721 marker to de-allocate. */
4724 ix86_can_use_return_insn_p (void)
4726 struct ix86_frame frame;
4728 if (! reload_completed || frame_pointer_needed)
4731 /* Don't allow more than 32 pop, since that's all we can do
4732 with one instruction. */
4733 if (current_function_pops_args
4734 && current_function_args_size >= 32768)
4737 ix86_compute_frame_layout (&frame);
4738 return frame.to_allocate == 0 && frame.nregs == 0;
4741 /* Value should be nonzero if functions must have frame pointers.
4742 Zero means the frame pointer need not be set up (and parms may
4743 be accessed via the stack pointer) in functions that seem suitable. */
4746 ix86_frame_pointer_required (void)
4748 /* If we accessed previous frames, then the generated code expects
4749 to be able to access the saved ebp value in our frame. */
4750 if (cfun->machine->accesses_prev_frame)
4753 /* Several x86 os'es need a frame pointer for other reasons,
4754 usually pertaining to setjmp. */
4755 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4758 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4759 the frame pointer by default. Turn it back on now if we've not
4760 got a leaf function. */
4761 if (TARGET_OMIT_LEAF_FRAME_POINTER
4762 && (!current_function_is_leaf
4763 || ix86_current_function_calls_tls_descriptor))
4766 if (current_function_profile)
4772 /* Record that the current function accesses previous call frames. */
4775 ix86_setup_frame_addresses (void)
4777 cfun->machine->accesses_prev_frame = 1;
4780 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4781 # define USE_HIDDEN_LINKONCE 1
4783 # define USE_HIDDEN_LINKONCE 0
4786 static int pic_labels_used;
4788 /* Fills in the label name that should be used for a pc thunk for
4789 the given register. */
4792 get_pc_thunk_name (char name[32], unsigned int regno)
4794 gcc_assert (!TARGET_64BIT);
4796 if (USE_HIDDEN_LINKONCE)
4797 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4799 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4803 /* This function generates code for -fpic that loads %ebx with
4804 the return address of the caller and then returns. */
4807 ix86_file_end (void)
4812 for (regno = 0; regno < 8; ++regno)
4816 if (! ((pic_labels_used >> regno) & 1))
4819 get_pc_thunk_name (name, regno);
4824 switch_to_section (darwin_sections[text_coal_section]);
4825 fputs ("\t.weak_definition\t", asm_out_file);
4826 assemble_name (asm_out_file, name);
4827 fputs ("\n\t.private_extern\t", asm_out_file);
4828 assemble_name (asm_out_file, name);
4829 fputs ("\n", asm_out_file);
4830 ASM_OUTPUT_LABEL (asm_out_file, name);
4834 if (USE_HIDDEN_LINKONCE)
4838 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4840 TREE_PUBLIC (decl) = 1;
4841 TREE_STATIC (decl) = 1;
4842 DECL_ONE_ONLY (decl) = 1;
4844 (*targetm.asm_out.unique_section) (decl, 0);
4845 switch_to_section (get_named_section (decl, NULL, 0));
4847 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4848 fputs ("\t.hidden\t", asm_out_file);
4849 assemble_name (asm_out_file, name);
4850 fputc ('\n', asm_out_file);
4851 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4855 switch_to_section (text_section);
4856 ASM_OUTPUT_LABEL (asm_out_file, name);
4859 xops[0] = gen_rtx_REG (SImode, regno);
4860 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4861 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4862 output_asm_insn ("ret", xops);
4865 if (NEED_INDICATE_EXEC_STACK)
4866 file_end_indicate_exec_stack ();
4869 /* Emit code for the SET_GOT patterns. */
4872 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4877 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4879 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4881 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4884 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4886 output_asm_insn ("call\t%a2", xops);
4889 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4890 is what will be referenced by the Mach-O PIC subsystem. */
4892 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4895 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4896 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4899 output_asm_insn ("pop{l}\t%0", xops);
4904 get_pc_thunk_name (name, REGNO (dest));
4905 pic_labels_used |= 1 << REGNO (dest);
4907 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4908 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4909 output_asm_insn ("call\t%X2", xops);
4910 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4911 is what will be referenced by the Mach-O PIC subsystem. */
4914 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4916 targetm.asm_out.internal_label (asm_out_file, "L",
4917 CODE_LABEL_NUMBER (label));
4924 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4925 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4927 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4932 /* Generate an "push" pattern for input ARG. */
4937 return gen_rtx_SET (VOIDmode,
4939 gen_rtx_PRE_DEC (Pmode,
4940 stack_pointer_rtx)),
4944 /* Return >= 0 if there is an unused call-clobbered register available
4945 for the entire function. */
4948 ix86_select_alt_pic_regnum (void)
4950 if (current_function_is_leaf && !current_function_profile
4951 && !ix86_current_function_calls_tls_descriptor)
4954 for (i = 2; i >= 0; --i)
4955 if (!regs_ever_live[i])
4959 return INVALID_REGNUM;
4962 /* Return 1 if we need to save REGNO. */
4964 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4966 if (pic_offset_table_rtx
4967 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4968 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4969 || current_function_profile
4970 || current_function_calls_eh_return
4971 || current_function_uses_const_pool))
4973 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4978 if (current_function_calls_eh_return && maybe_eh_return)
4983 unsigned test = EH_RETURN_DATA_REGNO (i);
4984 if (test == INVALID_REGNUM)
4991 if (cfun->machine->force_align_arg_pointer
4992 && regno == REGNO (cfun->machine->force_align_arg_pointer))
4995 return (regs_ever_live[regno]
4996 && !call_used_regs[regno]
4997 && !fixed_regs[regno]
4998 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5001 /* Return number of registers to be saved on the stack. */
5004 ix86_nsaved_regs (void)
5009 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5010 if (ix86_save_reg (regno, true))
5015 /* Return the offset between two registers, one to be eliminated, and the other
5016 its replacement, at the start of a routine. */
5019 ix86_initial_elimination_offset (int from, int to)
5021 struct ix86_frame frame;
5022 ix86_compute_frame_layout (&frame);
5024 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5025 return frame.hard_frame_pointer_offset;
5026 else if (from == FRAME_POINTER_REGNUM
5027 && to == HARD_FRAME_POINTER_REGNUM)
5028 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5031 gcc_assert (to == STACK_POINTER_REGNUM);
5033 if (from == ARG_POINTER_REGNUM)
5034 return frame.stack_pointer_offset;
5036 gcc_assert (from == FRAME_POINTER_REGNUM);
5037 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5041 /* Fill structure ix86_frame about frame of currently computed function. */
5044 ix86_compute_frame_layout (struct ix86_frame *frame)
5046 HOST_WIDE_INT total_size;
5047 unsigned int stack_alignment_needed;
5048 HOST_WIDE_INT offset;
5049 unsigned int preferred_alignment;
5050 HOST_WIDE_INT size = get_frame_size ();
5052 frame->nregs = ix86_nsaved_regs ();
5055 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5056 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5058 /* During reload iteration the amount of registers saved can change.
5059 Recompute the value as needed. Do not recompute when amount of registers
5060 didn't change as reload does multiple calls to the function and does not
5061 expect the decision to change within single iteration. */
5063 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5065 int count = frame->nregs;
5067 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5068 /* The fast prologue uses move instead of push to save registers. This
5069 is significantly longer, but also executes faster as modern hardware
5070 can execute the moves in parallel, but can't do that for push/pop.
5072 Be careful about choosing what prologue to emit: When function takes
5073 many instructions to execute we may use slow version as well as in
5074 case function is known to be outside hot spot (this is known with
5075 feedback only). Weight the size of function by number of registers
5076 to save as it is cheap to use one or two push instructions but very
5077 slow to use many of them. */
5079 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5080 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5081 || (flag_branch_probabilities
5082 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5083 cfun->machine->use_fast_prologue_epilogue = false;
5085 cfun->machine->use_fast_prologue_epilogue
5086 = !expensive_function_p (count);
5088 if (TARGET_PROLOGUE_USING_MOVE
5089 && cfun->machine->use_fast_prologue_epilogue)
5090 frame->save_regs_using_mov = true;
5092 frame->save_regs_using_mov = false;
5095 /* Skip return address and saved base pointer. */
5096 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5098 frame->hard_frame_pointer_offset = offset;
5100 /* Do some sanity checking of stack_alignment_needed and
5101 preferred_alignment, since i386 port is the only using those features
5102 that may break easily. */
5104 gcc_assert (!size || stack_alignment_needed);
5105 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5106 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5107 gcc_assert (stack_alignment_needed
5108 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5110 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5111 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5113 /* Register save area */
5114 offset += frame->nregs * UNITS_PER_WORD;
5117 if (ix86_save_varrargs_registers)
5119 offset += X86_64_VARARGS_SIZE;
5120 frame->va_arg_size = X86_64_VARARGS_SIZE;
5123 frame->va_arg_size = 0;
5125 /* Align start of frame for local function. */
5126 frame->padding1 = ((offset + stack_alignment_needed - 1)
5127 & -stack_alignment_needed) - offset;
5129 offset += frame->padding1;
5131 /* Frame pointer points here. */
5132 frame->frame_pointer_offset = offset;
5136 /* Add outgoing arguments area. Can be skipped if we eliminated
5137 all the function calls as dead code.
5138 Skipping is however impossible when function calls alloca. Alloca
5139 expander assumes that last current_function_outgoing_args_size
5140 of stack frame are unused. */
5141 if (ACCUMULATE_OUTGOING_ARGS
5142 && (!current_function_is_leaf || current_function_calls_alloca
5143 || ix86_current_function_calls_tls_descriptor))
5145 offset += current_function_outgoing_args_size;
5146 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5149 frame->outgoing_arguments_size = 0;
5151 /* Align stack boundary. Only needed if we're calling another function
5153 if (!current_function_is_leaf || current_function_calls_alloca
5154 || ix86_current_function_calls_tls_descriptor)
5155 frame->padding2 = ((offset + preferred_alignment - 1)
5156 & -preferred_alignment) - offset;
5158 frame->padding2 = 0;
5160 offset += frame->padding2;
5162 /* We've reached end of stack frame. */
5163 frame->stack_pointer_offset = offset;
5165 /* Size prologue needs to allocate. */
5166 frame->to_allocate =
5167 (size + frame->padding1 + frame->padding2
5168 + frame->outgoing_arguments_size + frame->va_arg_size);
5170 if ((!frame->to_allocate && frame->nregs <= 1)
5171 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5172 frame->save_regs_using_mov = false;
5174 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5175 && current_function_is_leaf
5176 && !ix86_current_function_calls_tls_descriptor)
5178 frame->red_zone_size = frame->to_allocate;
5179 if (frame->save_regs_using_mov)
5180 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5181 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5182 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5185 frame->red_zone_size = 0;
5186 frame->to_allocate -= frame->red_zone_size;
5187 frame->stack_pointer_offset -= frame->red_zone_size;
5189 fprintf (stderr, "nregs: %i\n", frame->nregs);
5190 fprintf (stderr, "size: %i\n", size);
5191 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5192 fprintf (stderr, "padding1: %i\n", frame->padding1);
5193 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5194 fprintf (stderr, "padding2: %i\n", frame->padding2);
5195 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5196 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5197 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5198 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5199 frame->hard_frame_pointer_offset);
5200 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5204 /* Emit code to save registers in the prologue. */
5207 ix86_emit_save_regs (void)
5212 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5213 if (ix86_save_reg (regno, true))
5215 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5216 RTX_FRAME_RELATED_P (insn) = 1;
5220 /* Emit code to save registers using MOV insns. First register
5221 is restored from POINTER + OFFSET. */
5223 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5228 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5229 if (ix86_save_reg (regno, true))
5231 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5233 gen_rtx_REG (Pmode, regno));
5234 RTX_FRAME_RELATED_P (insn) = 1;
5235 offset += UNITS_PER_WORD;
5239 /* Expand prologue or epilogue stack adjustment.
5240 The pattern exist to put a dependency on all ebp-based memory accesses.
5241 STYLE should be negative if instructions should be marked as frame related,
5242 zero if %r11 register is live and cannot be freely used and positive
5246 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5251 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5252 else if (x86_64_immediate_operand (offset, DImode))
5253 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5257 /* r11 is used by indirect sibcall return as well, set before the
5258 epilogue and used after the epilogue. ATM indirect sibcall
5259 shouldn't be used together with huge frame sizes in one
5260 function because of the frame_size check in sibcall.c. */
5262 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5263 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5265 RTX_FRAME_RELATED_P (insn) = 1;
5266 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5270 RTX_FRAME_RELATED_P (insn) = 1;
5273 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5276 ix86_internal_arg_pointer (void)
5278 bool has_force_align_arg_pointer =
5279 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5280 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5281 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5282 && DECL_NAME (current_function_decl)
5283 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5284 && DECL_FILE_SCOPE_P (current_function_decl))
5285 || ix86_force_align_arg_pointer
5286 || has_force_align_arg_pointer)
5288 /* Nested functions can't realign the stack due to a register
5290 if (DECL_CONTEXT (current_function_decl)
5291 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5293 if (ix86_force_align_arg_pointer)
5294 warning (0, "-mstackrealign ignored for nested functions");
5295 if (has_force_align_arg_pointer)
5296 error ("%s not supported for nested functions",
5297 ix86_force_align_arg_pointer_string);
5298 return virtual_incoming_args_rtx;
5300 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5301 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5304 return virtual_incoming_args_rtx;
5307 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5308 This is called from dwarf2out.c to emit call frame instructions
5309 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5311 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5313 rtx unspec = SET_SRC (pattern);
5314 gcc_assert (GET_CODE (unspec) == UNSPEC);
5318 case UNSPEC_REG_SAVE:
5319 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5320 SET_DEST (pattern));
5322 case UNSPEC_DEF_CFA:
5323 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5324 INTVAL (XVECEXP (unspec, 0, 0)));
5331 /* Expand the prologue into a bunch of separate insns. */
5334 ix86_expand_prologue (void)
5338 struct ix86_frame frame;
5339 HOST_WIDE_INT allocate;
5341 ix86_compute_frame_layout (&frame);
5343 if (cfun->machine->force_align_arg_pointer)
5347 /* Grab the argument pointer. */
5348 x = plus_constant (stack_pointer_rtx, 4);
5349 y = cfun->machine->force_align_arg_pointer;
5350 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5351 RTX_FRAME_RELATED_P (insn) = 1;
5353 /* The unwind info consists of two parts: install the fafp as the cfa,
5354 and record the fafp as the "save register" of the stack pointer.
5355 The later is there in order that the unwinder can see where it
5356 should restore the stack pointer across the and insn. */
5357 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5358 x = gen_rtx_SET (VOIDmode, y, x);
5359 RTX_FRAME_RELATED_P (x) = 1;
5360 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5362 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5363 RTX_FRAME_RELATED_P (y) = 1;
5364 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5365 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5366 REG_NOTES (insn) = x;
5368 /* Align the stack. */
5369 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5372 /* And here we cheat like madmen with the unwind info. We force the
5373 cfa register back to sp+4, which is exactly what it was at the
5374 start of the function. Re-pushing the return address results in
5375 the return at the same spot relative to the cfa, and thus is
5376 correct wrt the unwind info. */
5377 x = cfun->machine->force_align_arg_pointer;
5378 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5379 insn = emit_insn (gen_push (x));
5380 RTX_FRAME_RELATED_P (insn) = 1;
5383 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5384 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5385 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5386 REG_NOTES (insn) = x;
5389 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5390 slower on all targets. Also sdb doesn't like it. */
5392 if (frame_pointer_needed)
5394 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5395 RTX_FRAME_RELATED_P (insn) = 1;
5397 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5398 RTX_FRAME_RELATED_P (insn) = 1;
5401 allocate = frame.to_allocate;
5403 if (!frame.save_regs_using_mov)
5404 ix86_emit_save_regs ();
5406 allocate += frame.nregs * UNITS_PER_WORD;
5408 /* When using red zone we may start register saving before allocating
5409 the stack frame saving one cycle of the prologue. */
5410 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5411 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5412 : stack_pointer_rtx,
5413 -frame.nregs * UNITS_PER_WORD);
5417 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5418 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5419 GEN_INT (-allocate), -1);
5422 /* Only valid for Win32. */
5423 rtx eax = gen_rtx_REG (SImode, 0);
5424 bool eax_live = ix86_eax_live_at_start_p ();
5427 gcc_assert (!TARGET_64BIT);
5431 emit_insn (gen_push (eax));
5435 emit_move_insn (eax, GEN_INT (allocate));
5437 insn = emit_insn (gen_allocate_stack_worker (eax));
5438 RTX_FRAME_RELATED_P (insn) = 1;
5439 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5440 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5441 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5442 t, REG_NOTES (insn));
5446 if (frame_pointer_needed)
5447 t = plus_constant (hard_frame_pointer_rtx,
5450 - frame.nregs * UNITS_PER_WORD);
5452 t = plus_constant (stack_pointer_rtx, allocate);
5453 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5457 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5459 if (!frame_pointer_needed || !frame.to_allocate)
5460 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5462 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5463 -frame.nregs * UNITS_PER_WORD);
5466 pic_reg_used = false;
5467 if (pic_offset_table_rtx
5468 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5469 || current_function_profile))
5471 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5473 if (alt_pic_reg_used != INVALID_REGNUM)
5474 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5476 pic_reg_used = true;
5482 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5484 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5486 /* Even with accurate pre-reload life analysis, we can wind up
5487 deleting all references to the pic register after reload.
5488 Consider if cross-jumping unifies two sides of a branch
5489 controlled by a comparison vs the only read from a global.
5490 In which case, allow the set_got to be deleted, though we're
5491 too late to do anything about the ebx save in the prologue. */
5492 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5495 /* Prevent function calls from be scheduled before the call to mcount.
5496 In the pic_reg_used case, make sure that the got load isn't deleted. */
5497 if (current_function_profile)
5498 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5501 /* Emit code to restore saved registers using MOV insns. First register
5502 is restored from POINTER + OFFSET. */
5504 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5505 int maybe_eh_return)
5508 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5510 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5511 if (ix86_save_reg (regno, maybe_eh_return))
5513 /* Ensure that adjust_address won't be forced to produce pointer
5514 out of range allowed by x86-64 instruction set. */
5515 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5519 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5520 emit_move_insn (r11, GEN_INT (offset));
5521 emit_insn (gen_adddi3 (r11, r11, pointer));
5522 base_address = gen_rtx_MEM (Pmode, r11);
5525 emit_move_insn (gen_rtx_REG (Pmode, regno),
5526 adjust_address (base_address, Pmode, offset));
5527 offset += UNITS_PER_WORD;
5531 /* Restore function stack, frame, and registers. */
5534 ix86_expand_epilogue (int style)
5537 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5538 struct ix86_frame frame;
5539 HOST_WIDE_INT offset;
5541 ix86_compute_frame_layout (&frame);
5543 /* Calculate start of saved registers relative to ebp. Special care
5544 must be taken for the normal return case of a function using
5545 eh_return: the eax and edx registers are marked as saved, but not
5546 restored along this path. */
5547 offset = frame.nregs;
5548 if (current_function_calls_eh_return && style != 2)
5550 offset *= -UNITS_PER_WORD;
5552 /* If we're only restoring one register and sp is not valid then
5553 using a move instruction to restore the register since it's
5554 less work than reloading sp and popping the register.
5556 The default code result in stack adjustment using add/lea instruction,
5557 while this code results in LEAVE instruction (or discrete equivalent),
5558 so it is profitable in some other cases as well. Especially when there
5559 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5560 and there is exactly one register to pop. This heuristic may need some
5561 tuning in future. */
5562 if ((!sp_valid && frame.nregs <= 1)
5563 || (TARGET_EPILOGUE_USING_MOVE
5564 && cfun->machine->use_fast_prologue_epilogue
5565 && (frame.nregs > 1 || frame.to_allocate))
5566 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5567 || (frame_pointer_needed && TARGET_USE_LEAVE
5568 && cfun->machine->use_fast_prologue_epilogue
5569 && frame.nregs == 1)
5570 || current_function_calls_eh_return)
5572 /* Restore registers. We can use ebp or esp to address the memory
5573 locations. If both are available, default to ebp, since offsets
5574 are known to be small. Only exception is esp pointing directly to the
5575 end of block of saved registers, where we may simplify addressing
5578 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5579 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5580 frame.to_allocate, style == 2);
5582 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5583 offset, style == 2);
5585 /* eh_return epilogues need %ecx added to the stack pointer. */
5588 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5590 if (frame_pointer_needed)
5592 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5593 tmp = plus_constant (tmp, UNITS_PER_WORD);
5594 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5596 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5597 emit_move_insn (hard_frame_pointer_rtx, tmp);
5599 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5604 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5605 tmp = plus_constant (tmp, (frame.to_allocate
5606 + frame.nregs * UNITS_PER_WORD));
5607 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5610 else if (!frame_pointer_needed)
5611 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5612 GEN_INT (frame.to_allocate
5613 + frame.nregs * UNITS_PER_WORD),
5615 /* If not an i386, mov & pop is faster than "leave". */
5616 else if (TARGET_USE_LEAVE || optimize_size
5617 || !cfun->machine->use_fast_prologue_epilogue)
5618 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5621 pro_epilogue_adjust_stack (stack_pointer_rtx,
5622 hard_frame_pointer_rtx,
5625 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5627 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5632 /* First step is to deallocate the stack frame so that we can
5633 pop the registers. */
5636 gcc_assert (frame_pointer_needed);
5637 pro_epilogue_adjust_stack (stack_pointer_rtx,
5638 hard_frame_pointer_rtx,
5639 GEN_INT (offset), style);
5641 else if (frame.to_allocate)
5642 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5643 GEN_INT (frame.to_allocate), style);
5645 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5646 if (ix86_save_reg (regno, false))
5649 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5651 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5653 if (frame_pointer_needed)
5655 /* Leave results in shorter dependency chains on CPUs that are
5656 able to grok it fast. */
5657 if (TARGET_USE_LEAVE)
5658 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5659 else if (TARGET_64BIT)
5660 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5662 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5666 if (cfun->machine->force_align_arg_pointer)
5668 emit_insn (gen_addsi3 (stack_pointer_rtx,
5669 cfun->machine->force_align_arg_pointer,
5673 /* Sibcall epilogues don't want a return instruction. */
5677 if (current_function_pops_args && current_function_args_size)
5679 rtx popc = GEN_INT (current_function_pops_args);
5681 /* i386 can only pop 64K bytes. If asked to pop more, pop
5682 return address, do explicit add, and jump indirectly to the
5685 if (current_function_pops_args >= 65536)
5687 rtx ecx = gen_rtx_REG (SImode, 2);
5689 /* There is no "pascal" calling convention in 64bit ABI. */
5690 gcc_assert (!TARGET_64BIT);
5692 emit_insn (gen_popsi1 (ecx));
5693 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5694 emit_jump_insn (gen_return_indirect_internal (ecx));
5697 emit_jump_insn (gen_return_pop_internal (popc));
5700 emit_jump_insn (gen_return_internal ());
5703 /* Reset from the function's potential modifications. */
5706 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5707 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5709 if (pic_offset_table_rtx)
5710 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5712 /* Mach-O doesn't support labels at the end of objects, so if
5713 it looks like we might want one, insert a NOP. */
5715 rtx insn = get_last_insn ();
5718 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5719 insn = PREV_INSN (insn);
5723 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5724 fputs ("\tnop\n", file);
5730 /* Extract the parts of an RTL expression that is a valid memory address
5731 for an instruction. Return 0 if the structure of the address is
5732 grossly off. Return -1 if the address contains ASHIFT, so it is not
5733 strictly valid, but still used for computing length of lea instruction. */
5736 ix86_decompose_address (rtx addr, struct ix86_address *out)
5738 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5739 rtx base_reg, index_reg;
5740 HOST_WIDE_INT scale = 1;
5741 rtx scale_rtx = NULL_RTX;
5743 enum ix86_address_seg seg = SEG_DEFAULT;
5745 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5747 else if (GET_CODE (addr) == PLUS)
5757 addends[n++] = XEXP (op, 1);
5760 while (GET_CODE (op) == PLUS);
5765 for (i = n; i >= 0; --i)
5768 switch (GET_CODE (op))
5773 index = XEXP (op, 0);
5774 scale_rtx = XEXP (op, 1);
5778 if (XINT (op, 1) == UNSPEC_TP
5779 && TARGET_TLS_DIRECT_SEG_REFS
5780 && seg == SEG_DEFAULT)
5781 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5810 else if (GET_CODE (addr) == MULT)
5812 index = XEXP (addr, 0); /* index*scale */
5813 scale_rtx = XEXP (addr, 1);
5815 else if (GET_CODE (addr) == ASHIFT)
5819 /* We're called for lea too, which implements ashift on occasion. */
5820 index = XEXP (addr, 0);
5821 tmp = XEXP (addr, 1);
5822 if (GET_CODE (tmp) != CONST_INT)
5824 scale = INTVAL (tmp);
5825 if ((unsigned HOST_WIDE_INT) scale > 3)
5831 disp = addr; /* displacement */
5833 /* Extract the integral value of scale. */
5836 if (GET_CODE (scale_rtx) != CONST_INT)
5838 scale = INTVAL (scale_rtx);
5841 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5842 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5844 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5845 if (base_reg && index_reg && scale == 1
5846 && (index_reg == arg_pointer_rtx
5847 || index_reg == frame_pointer_rtx
5848 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5851 tmp = base, base = index, index = tmp;
5852 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5855 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5856 if ((base_reg == hard_frame_pointer_rtx
5857 || base_reg == frame_pointer_rtx
5858 || base_reg == arg_pointer_rtx) && !disp)
5861 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5862 Avoid this by transforming to [%esi+0]. */
5863 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5864 && base_reg && !index_reg && !disp
5866 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5869 /* Special case: encode reg+reg instead of reg*2. */
5870 if (!base && index && scale && scale == 2)
5871 base = index, base_reg = index_reg, scale = 1;
5873 /* Special case: scaling cannot be encoded without base or displacement. */
5874 if (!base && !disp && index && scale != 1)
5886 /* Return cost of the memory address x.
5887 For i386, it is better to use a complex address than let gcc copy
5888 the address into a reg and make a new pseudo. But not if the address
5889 requires to two regs - that would mean more pseudos with longer
5892 ix86_address_cost (rtx x)
5894 struct ix86_address parts;
5896 int ok = ix86_decompose_address (x, &parts);
5900 if (parts.base && GET_CODE (parts.base) == SUBREG)
5901 parts.base = SUBREG_REG (parts.base);
5902 if (parts.index && GET_CODE (parts.index) == SUBREG)
5903 parts.index = SUBREG_REG (parts.index);
5905 /* More complex memory references are better. */
5906 if (parts.disp && parts.disp != const0_rtx)
5908 if (parts.seg != SEG_DEFAULT)
5911 /* Attempt to minimize number of registers in the address. */
5913 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5915 && (!REG_P (parts.index)
5916 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5920 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5922 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5923 && parts.base != parts.index)
5926 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5927 since it's predecode logic can't detect the length of instructions
5928 and it degenerates to vector decoded. Increase cost of such
5929 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5930 to split such addresses or even refuse such addresses at all.
5932 Following addressing modes are affected:
5937 The first and last case may be avoidable by explicitly coding the zero in
5938 memory address, but I don't have AMD-K6 machine handy to check this
5942 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5943 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5944 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5950 /* If X is a machine specific address (i.e. a symbol or label being
5951 referenced as a displacement from the GOT implemented using an
5952 UNSPEC), then return the base term. Otherwise return X. */
5955 ix86_find_base_term (rtx x)
5961 if (GET_CODE (x) != CONST)
5964 if (GET_CODE (term) == PLUS
5965 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5966 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5967 term = XEXP (term, 0);
5968 if (GET_CODE (term) != UNSPEC
5969 || XINT (term, 1) != UNSPEC_GOTPCREL)
5972 term = XVECEXP (term, 0, 0);
5974 if (GET_CODE (term) != SYMBOL_REF
5975 && GET_CODE (term) != LABEL_REF)
5981 term = ix86_delegitimize_address (x);
5983 if (GET_CODE (term) != SYMBOL_REF
5984 && GET_CODE (term) != LABEL_REF)
5990 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5991 this is used for to form addresses to local data when -fPIC is in
5995 darwin_local_data_pic (rtx disp)
5997 if (GET_CODE (disp) == MINUS)
5999 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6000 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6001 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6003 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6004 if (! strcmp (sym_name, "<pic base>"))
6012 /* Determine if a given RTX is a valid constant. We already know this
6013 satisfies CONSTANT_P. */
6016 legitimate_constant_p (rtx x)
6018 switch (GET_CODE (x))
6023 if (GET_CODE (x) == PLUS)
6025 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6030 if (TARGET_MACHO && darwin_local_data_pic (x))
6033 /* Only some unspecs are valid as "constants". */
6034 if (GET_CODE (x) == UNSPEC)
6035 switch (XINT (x, 1))
6038 return TARGET_64BIT;
6041 x = XVECEXP (x, 0, 0);
6042 return (GET_CODE (x) == SYMBOL_REF
6043 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6045 x = XVECEXP (x, 0, 0);
6046 return (GET_CODE (x) == SYMBOL_REF
6047 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6052 /* We must have drilled down to a symbol. */
6053 if (GET_CODE (x) == LABEL_REF)
6055 if (GET_CODE (x) != SYMBOL_REF)
6060 /* TLS symbols are never valid. */
6061 if (SYMBOL_REF_TLS_MODEL (x))
6066 if (GET_MODE (x) == TImode
6067 && x != CONST0_RTX (TImode)
6073 if (x == CONST0_RTX (GET_MODE (x)))
6081 /* Otherwise we handle everything else in the move patterns. */
6085 /* Determine if it's legal to put X into the constant pool. This
6086 is not possible for the address of thread-local symbols, which
6087 is checked above. */
6090 ix86_cannot_force_const_mem (rtx x)
6092 /* We can always put integral constants and vectors in memory. */
6093 switch (GET_CODE (x))
6103 return !legitimate_constant_p (x);
6106 /* Determine if a given RTX is a valid constant address. */
6109 constant_address_p (rtx x)
6111 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6114 /* Nonzero if the constant value X is a legitimate general operand
6115 when generating PIC code. It is given that flag_pic is on and
6116 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6119 legitimate_pic_operand_p (rtx x)
6123 switch (GET_CODE (x))
6126 inner = XEXP (x, 0);
6127 if (GET_CODE (inner) == PLUS
6128 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6129 inner = XEXP (inner, 0);
6131 /* Only some unspecs are valid as "constants". */
6132 if (GET_CODE (inner) == UNSPEC)
6133 switch (XINT (inner, 1))
6136 return TARGET_64BIT;
6138 x = XVECEXP (inner, 0, 0);
6139 return (GET_CODE (x) == SYMBOL_REF
6140 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6148 return legitimate_pic_address_disp_p (x);
6155 /* Determine if a given CONST RTX is a valid memory displacement
6159 legitimate_pic_address_disp_p (rtx disp)
6163 /* In 64bit mode we can allow direct addresses of symbols and labels
6164 when they are not dynamic symbols. */
6167 rtx op0 = disp, op1;
6169 switch (GET_CODE (disp))
6175 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6177 op0 = XEXP (XEXP (disp, 0), 0);
6178 op1 = XEXP (XEXP (disp, 0), 1);
6179 if (GET_CODE (op1) != CONST_INT
6180 || INTVAL (op1) >= 16*1024*1024
6181 || INTVAL (op1) < -16*1024*1024)
6183 if (GET_CODE (op0) == LABEL_REF)
6185 if (GET_CODE (op0) != SYMBOL_REF)
6190 /* TLS references should always be enclosed in UNSPEC. */
6191 if (SYMBOL_REF_TLS_MODEL (op0))
6193 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6201 if (GET_CODE (disp) != CONST)
6203 disp = XEXP (disp, 0);
6207 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6208 of GOT tables. We should not need these anyway. */
6209 if (GET_CODE (disp) != UNSPEC
6210 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6211 && XINT (disp, 1) != UNSPEC_GOTOFF))
6214 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6215 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6221 if (GET_CODE (disp) == PLUS)
6223 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6225 disp = XEXP (disp, 0);
6229 if (TARGET_MACHO && darwin_local_data_pic (disp))
6232 if (GET_CODE (disp) != UNSPEC)
6235 switch (XINT (disp, 1))
6240 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6242 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6243 While ABI specify also 32bit relocation but we don't produce it in
6244 small PIC model at all. */
6245 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6246 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6248 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6250 case UNSPEC_GOTTPOFF:
6251 case UNSPEC_GOTNTPOFF:
6252 case UNSPEC_INDNTPOFF:
6255 disp = XVECEXP (disp, 0, 0);
6256 return (GET_CODE (disp) == SYMBOL_REF
6257 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6259 disp = XVECEXP (disp, 0, 0);
6260 return (GET_CODE (disp) == SYMBOL_REF
6261 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6263 disp = XVECEXP (disp, 0, 0);
6264 return (GET_CODE (disp) == SYMBOL_REF
6265 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6271 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6272 memory address for an instruction. The MODE argument is the machine mode
6273 for the MEM expression that wants to use this address.
6275 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6276 convert common non-canonical forms to canonical form so that they will
6280 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6282 struct ix86_address parts;
6283 rtx base, index, disp;
6284 HOST_WIDE_INT scale;
6285 const char *reason = NULL;
6286 rtx reason_rtx = NULL_RTX;
6288 if (TARGET_DEBUG_ADDR)
6291 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6292 GET_MODE_NAME (mode), strict);
6296 if (ix86_decompose_address (addr, &parts) <= 0)
6298 reason = "decomposition failed";
6303 index = parts.index;
6305 scale = parts.scale;
6307 /* Validate base register.
6309 Don't allow SUBREG's that span more than a word here. It can lead to spill
6310 failures when the base is one word out of a two word structure, which is
6311 represented internally as a DImode int. */
6320 else if (GET_CODE (base) == SUBREG
6321 && REG_P (SUBREG_REG (base))
6322 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6324 reg = SUBREG_REG (base);
6327 reason = "base is not a register";
6331 if (GET_MODE (base) != Pmode)
6333 reason = "base is not in Pmode";
6337 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6338 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6340 reason = "base is not valid";
6345 /* Validate index register.
6347 Don't allow SUBREG's that span more than a word here -- same as above. */
6356 else if (GET_CODE (index) == SUBREG
6357 && REG_P (SUBREG_REG (index))
6358 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6360 reg = SUBREG_REG (index);
6363 reason = "index is not a register";
6367 if (GET_MODE (index) != Pmode)
6369 reason = "index is not in Pmode";
6373 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6374 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6376 reason = "index is not valid";
6381 /* Validate scale factor. */
6384 reason_rtx = GEN_INT (scale);
6387 reason = "scale without index";
6391 if (scale != 2 && scale != 4 && scale != 8)
6393 reason = "scale is not a valid multiplier";
6398 /* Validate displacement. */
6403 if (GET_CODE (disp) == CONST
6404 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6405 switch (XINT (XEXP (disp, 0), 1))
6407 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6408 used. While ABI specify also 32bit relocations, we don't produce
6409 them at all and use IP relative instead. */
6412 gcc_assert (flag_pic);
6414 goto is_legitimate_pic;
6415 reason = "64bit address unspec";
6418 case UNSPEC_GOTPCREL:
6419 gcc_assert (flag_pic);
6420 goto is_legitimate_pic;
6422 case UNSPEC_GOTTPOFF:
6423 case UNSPEC_GOTNTPOFF:
6424 case UNSPEC_INDNTPOFF:
6430 reason = "invalid address unspec";
6434 else if (SYMBOLIC_CONST (disp)
6438 && MACHOPIC_INDIRECT
6439 && !machopic_operand_p (disp)
6445 if (TARGET_64BIT && (index || base))
6447 /* foo@dtpoff(%rX) is ok. */
6448 if (GET_CODE (disp) != CONST
6449 || GET_CODE (XEXP (disp, 0)) != PLUS
6450 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6451 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6452 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6453 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6455 reason = "non-constant pic memory reference";
6459 else if (! legitimate_pic_address_disp_p (disp))
6461 reason = "displacement is an invalid pic construct";
6465 /* This code used to verify that a symbolic pic displacement
6466 includes the pic_offset_table_rtx register.
6468 While this is good idea, unfortunately these constructs may
6469 be created by "adds using lea" optimization for incorrect
6478 This code is nonsensical, but results in addressing
6479 GOT table with pic_offset_table_rtx base. We can't
6480 just refuse it easily, since it gets matched by
6481 "addsi3" pattern, that later gets split to lea in the
6482 case output register differs from input. While this
6483 can be handled by separate addsi pattern for this case
6484 that never results in lea, this seems to be easier and
6485 correct fix for crash to disable this test. */
6487 else if (GET_CODE (disp) != LABEL_REF
6488 && GET_CODE (disp) != CONST_INT
6489 && (GET_CODE (disp) != CONST
6490 || !legitimate_constant_p (disp))
6491 && (GET_CODE (disp) != SYMBOL_REF
6492 || !legitimate_constant_p (disp)))
6494 reason = "displacement is not constant";
6497 else if (TARGET_64BIT
6498 && !x86_64_immediate_operand (disp, VOIDmode))
6500 reason = "displacement is out of range";
6505 /* Everything looks valid. */
6506 if (TARGET_DEBUG_ADDR)
6507 fprintf (stderr, "Success.\n");
6511 if (TARGET_DEBUG_ADDR)
6513 fprintf (stderr, "Error: %s\n", reason);
6514 debug_rtx (reason_rtx);
6519 /* Return a unique alias set for the GOT. */
6521 static HOST_WIDE_INT
6522 ix86_GOT_alias_set (void)
6524 static HOST_WIDE_INT set = -1;
6526 set = new_alias_set ();
6530 /* Return a legitimate reference for ORIG (an address) using the
6531 register REG. If REG is 0, a new pseudo is generated.
6533 There are two types of references that must be handled:
6535 1. Global data references must load the address from the GOT, via
6536 the PIC reg. An insn is emitted to do this load, and the reg is
6539 2. Static data references, constant pool addresses, and code labels
6540 compute the address as an offset from the GOT, whose base is in
6541 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6542 differentiate them from global data objects. The returned
6543 address is the PIC reg + an unspec constant.
6545 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6546 reg also appears in the address. */
6549 legitimize_pic_address (rtx orig, rtx reg)
6556 if (TARGET_MACHO && !TARGET_64BIT)
6559 reg = gen_reg_rtx (Pmode);
6560 /* Use the generic Mach-O PIC machinery. */
6561 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6565 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6567 else if (TARGET_64BIT
6568 && ix86_cmodel != CM_SMALL_PIC
6569 && local_symbolic_operand (addr, Pmode))
6572 /* This symbol may be referenced via a displacement from the PIC
6573 base address (@GOTOFF). */
6575 if (reload_in_progress)
6576 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6577 if (GET_CODE (addr) == CONST)
6578 addr = XEXP (addr, 0);
6579 if (GET_CODE (addr) == PLUS)
6581 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6582 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6585 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6586 new = gen_rtx_CONST (Pmode, new);
6588 tmpreg = gen_reg_rtx (Pmode);
6591 emit_move_insn (tmpreg, new);
6595 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6596 tmpreg, 1, OPTAB_DIRECT);
6599 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6601 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6603 /* This symbol may be referenced via a displacement from the PIC
6604 base address (@GOTOFF). */
6606 if (reload_in_progress)
6607 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6608 if (GET_CODE (addr) == CONST)
6609 addr = XEXP (addr, 0);
6610 if (GET_CODE (addr) == PLUS)
6612 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6613 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6616 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6617 new = gen_rtx_CONST (Pmode, new);
6618 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6622 emit_move_insn (reg, new);
6626 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6630 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6631 new = gen_rtx_CONST (Pmode, new);
6632 new = gen_const_mem (Pmode, new);
6633 set_mem_alias_set (new, ix86_GOT_alias_set ());
6636 reg = gen_reg_rtx (Pmode);
6637 /* Use directly gen_movsi, otherwise the address is loaded
6638 into register for CSE. We don't want to CSE this addresses,
6639 instead we CSE addresses from the GOT table, so skip this. */
6640 emit_insn (gen_movsi (reg, new));
6645 /* This symbol must be referenced via a load from the
6646 Global Offset Table (@GOT). */
6648 if (reload_in_progress)
6649 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6650 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6651 new = gen_rtx_CONST (Pmode, new);
6652 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6653 new = gen_const_mem (Pmode, new);
6654 set_mem_alias_set (new, ix86_GOT_alias_set ());
6657 reg = gen_reg_rtx (Pmode);
6658 emit_move_insn (reg, new);
6664 if (GET_CODE (addr) == CONST_INT
6665 && !x86_64_immediate_operand (addr, VOIDmode))
6669 emit_move_insn (reg, addr);
6673 new = force_reg (Pmode, addr);
6675 else if (GET_CODE (addr) == CONST)
6677 addr = XEXP (addr, 0);
6679 /* We must match stuff we generate before. Assume the only
6680 unspecs that can get here are ours. Not that we could do
6681 anything with them anyway.... */
6682 if (GET_CODE (addr) == UNSPEC
6683 || (GET_CODE (addr) == PLUS
6684 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6686 gcc_assert (GET_CODE (addr) == PLUS);
6688 if (GET_CODE (addr) == PLUS)
6690 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6692 /* Check first to see if this is a constant offset from a @GOTOFF
6693 symbol reference. */
6694 if (local_symbolic_operand (op0, Pmode)
6695 && GET_CODE (op1) == CONST_INT)
6699 if (reload_in_progress)
6700 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6701 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6703 new = gen_rtx_PLUS (Pmode, new, op1);
6704 new = gen_rtx_CONST (Pmode, new);
6705 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6709 emit_move_insn (reg, new);
6715 if (INTVAL (op1) < -16*1024*1024
6716 || INTVAL (op1) >= 16*1024*1024)
6718 if (!x86_64_immediate_operand (op1, Pmode))
6719 op1 = force_reg (Pmode, op1);
6720 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6726 base = legitimize_pic_address (XEXP (addr, 0), reg);
6727 new = legitimize_pic_address (XEXP (addr, 1),
6728 base == reg ? NULL_RTX : reg);
6730 if (GET_CODE (new) == CONST_INT)
6731 new = plus_constant (base, INTVAL (new));
6734 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6736 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6737 new = XEXP (new, 1);
6739 new = gen_rtx_PLUS (Pmode, base, new);
6747 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6750 get_thread_pointer (int to_reg)
6754 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6758 reg = gen_reg_rtx (Pmode);
6759 insn = gen_rtx_SET (VOIDmode, reg, tp);
6760 insn = emit_insn (insn);
6765 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6766 false if we expect this to be used for a memory address and true if
6767 we expect to load the address into a register. */
6770 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6772 rtx dest, base, off, pic, tp;
6777 case TLS_MODEL_GLOBAL_DYNAMIC:
6778 dest = gen_reg_rtx (Pmode);
6779 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6781 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6783 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6786 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6787 insns = get_insns ();
6790 emit_libcall_block (insns, dest, rax, x);
6792 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6793 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6795 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6797 if (TARGET_GNU2_TLS)
6799 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6801 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6805 case TLS_MODEL_LOCAL_DYNAMIC:
6806 base = gen_reg_rtx (Pmode);
6807 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6809 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6811 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6814 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6815 insns = get_insns ();
6818 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6819 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6820 emit_libcall_block (insns, base, rax, note);
6822 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6823 emit_insn (gen_tls_local_dynamic_base_64 (base));
6825 emit_insn (gen_tls_local_dynamic_base_32 (base));
6827 if (TARGET_GNU2_TLS)
6829 rtx x = ix86_tls_module_base ();
6831 set_unique_reg_note (get_last_insn (), REG_EQUIV,
6832 gen_rtx_MINUS (Pmode, x, tp));
6835 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6836 off = gen_rtx_CONST (Pmode, off);
6838 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6840 if (TARGET_GNU2_TLS)
6842 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
6844 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6849 case TLS_MODEL_INITIAL_EXEC:
6853 type = UNSPEC_GOTNTPOFF;
6857 if (reload_in_progress)
6858 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6859 pic = pic_offset_table_rtx;
6860 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6862 else if (!TARGET_ANY_GNU_TLS)
6864 pic = gen_reg_rtx (Pmode);
6865 emit_insn (gen_set_got (pic));
6866 type = UNSPEC_GOTTPOFF;
6871 type = UNSPEC_INDNTPOFF;
6874 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6875 off = gen_rtx_CONST (Pmode, off);
6877 off = gen_rtx_PLUS (Pmode, pic, off);
6878 off = gen_const_mem (Pmode, off);
6879 set_mem_alias_set (off, ix86_GOT_alias_set ());
6881 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6883 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6884 off = force_reg (Pmode, off);
6885 return gen_rtx_PLUS (Pmode, base, off);
6889 base = get_thread_pointer (true);
6890 dest = gen_reg_rtx (Pmode);
6891 emit_insn (gen_subsi3 (dest, base, off));
6895 case TLS_MODEL_LOCAL_EXEC:
6896 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6897 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6898 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6899 off = gen_rtx_CONST (Pmode, off);
6901 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6903 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6904 return gen_rtx_PLUS (Pmode, base, off);
6908 base = get_thread_pointer (true);
6909 dest = gen_reg_rtx (Pmode);
6910 emit_insn (gen_subsi3 (dest, base, off));
6921 /* Try machine-dependent ways of modifying an illegitimate address
6922 to be legitimate. If we find one, return the new, valid address.
6923 This macro is used in only one place: `memory_address' in explow.c.
6925 OLDX is the address as it was before break_out_memory_refs was called.
6926 In some cases it is useful to look at this to decide what needs to be done.
6928 MODE and WIN are passed so that this macro can use
6929 GO_IF_LEGITIMATE_ADDRESS.
6931 It is always safe for this macro to do nothing. It exists to recognize
6932 opportunities to optimize the output.
6934 For the 80386, we handle X+REG by loading X into a register R and
6935 using R+REG. R will go in a general reg and indexing will be used.
6936 However, if REG is a broken-out memory address or multiplication,
6937 nothing needs to be done because REG can certainly go in a general reg.
6939 When -fpic is used, special handling is needed for symbolic references.
6940 See comments by legitimize_pic_address in i386.c for details. */
6943 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6948 if (TARGET_DEBUG_ADDR)
6950 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6951 GET_MODE_NAME (mode));
6955 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6957 return legitimize_tls_address (x, log, false);
6958 if (GET_CODE (x) == CONST
6959 && GET_CODE (XEXP (x, 0)) == PLUS
6960 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6961 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6963 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6964 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6967 if (flag_pic && SYMBOLIC_CONST (x))
6968 return legitimize_pic_address (x, 0);
6970 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6971 if (GET_CODE (x) == ASHIFT
6972 && GET_CODE (XEXP (x, 1)) == CONST_INT
6973 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
6976 log = INTVAL (XEXP (x, 1));
6977 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6978 GEN_INT (1 << log));
6981 if (GET_CODE (x) == PLUS)
6983 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6985 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6986 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6987 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
6990 log = INTVAL (XEXP (XEXP (x, 0), 1));
6991 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6992 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6993 GEN_INT (1 << log));
6996 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6997 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6998 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7001 log = INTVAL (XEXP (XEXP (x, 1), 1));
7002 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7003 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7004 GEN_INT (1 << log));
7007 /* Put multiply first if it isn't already. */
7008 if (GET_CODE (XEXP (x, 1)) == MULT)
7010 rtx tmp = XEXP (x, 0);
7011 XEXP (x, 0) = XEXP (x, 1);
7016 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7017 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7018 created by virtual register instantiation, register elimination, and
7019 similar optimizations. */
7020 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7023 x = gen_rtx_PLUS (Pmode,
7024 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7025 XEXP (XEXP (x, 1), 0)),
7026 XEXP (XEXP (x, 1), 1));
7030 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7031 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7032 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7033 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7034 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7035 && CONSTANT_P (XEXP (x, 1)))
7038 rtx other = NULL_RTX;
7040 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7042 constant = XEXP (x, 1);
7043 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7045 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7047 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7048 other = XEXP (x, 1);
7056 x = gen_rtx_PLUS (Pmode,
7057 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7058 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7059 plus_constant (other, INTVAL (constant)));
7063 if (changed && legitimate_address_p (mode, x, FALSE))
7066 if (GET_CODE (XEXP (x, 0)) == MULT)
7069 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7072 if (GET_CODE (XEXP (x, 1)) == MULT)
7075 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7079 && GET_CODE (XEXP (x, 1)) == REG
7080 && GET_CODE (XEXP (x, 0)) == REG)
7083 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7086 x = legitimize_pic_address (x, 0);
7089 if (changed && legitimate_address_p (mode, x, FALSE))
7092 if (GET_CODE (XEXP (x, 0)) == REG)
7094 rtx temp = gen_reg_rtx (Pmode);
7095 rtx val = force_operand (XEXP (x, 1), temp);
7097 emit_move_insn (temp, val);
7103 else if (GET_CODE (XEXP (x, 1)) == REG)
7105 rtx temp = gen_reg_rtx (Pmode);
7106 rtx val = force_operand (XEXP (x, 0), temp);
7108 emit_move_insn (temp, val);
7118 /* Print an integer constant expression in assembler syntax. Addition
7119 and subtraction are the only arithmetic that may appear in these
7120 expressions. FILE is the stdio stream to write to, X is the rtx, and
7121 CODE is the operand print code from the output string. */
7124 output_pic_addr_const (FILE *file, rtx x, int code)
7128 switch (GET_CODE (x))
7131 gcc_assert (flag_pic);
7136 output_addr_const (file, x);
7137 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7138 fputs ("@PLT", file);
7145 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7146 assemble_name (asm_out_file, buf);
7150 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7154 /* This used to output parentheses around the expression,
7155 but that does not work on the 386 (either ATT or BSD assembler). */
7156 output_pic_addr_const (file, XEXP (x, 0), code);
7160 if (GET_MODE (x) == VOIDmode)
7162 /* We can use %d if the number is <32 bits and positive. */
7163 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7164 fprintf (file, "0x%lx%08lx",
7165 (unsigned long) CONST_DOUBLE_HIGH (x),
7166 (unsigned long) CONST_DOUBLE_LOW (x));
7168 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7171 /* We can't handle floating point constants;
7172 PRINT_OPERAND must handle them. */
7173 output_operand_lossage ("floating constant misused");
7177 /* Some assemblers need integer constants to appear first. */
7178 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7180 output_pic_addr_const (file, XEXP (x, 0), code);
7182 output_pic_addr_const (file, XEXP (x, 1), code);
7186 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7187 output_pic_addr_const (file, XEXP (x, 1), code);
7189 output_pic_addr_const (file, XEXP (x, 0), code);
7195 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7196 output_pic_addr_const (file, XEXP (x, 0), code);
7198 output_pic_addr_const (file, XEXP (x, 1), code);
7200 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7204 gcc_assert (XVECLEN (x, 0) == 1);
7205 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7206 switch (XINT (x, 1))
7209 fputs ("@GOT", file);
7212 fputs ("@GOTOFF", file);
7214 case UNSPEC_GOTPCREL:
7215 fputs ("@GOTPCREL(%rip)", file);
7217 case UNSPEC_GOTTPOFF:
7218 /* FIXME: This might be @TPOFF in Sun ld too. */
7219 fputs ("@GOTTPOFF", file);
7222 fputs ("@TPOFF", file);
7226 fputs ("@TPOFF", file);
7228 fputs ("@NTPOFF", file);
7231 fputs ("@DTPOFF", file);
7233 case UNSPEC_GOTNTPOFF:
7235 fputs ("@GOTTPOFF(%rip)", file);
7237 fputs ("@GOTNTPOFF", file);
7239 case UNSPEC_INDNTPOFF:
7240 fputs ("@INDNTPOFF", file);
7243 output_operand_lossage ("invalid UNSPEC as operand");
7249 output_operand_lossage ("invalid expression as operand");
7253 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7254 We need to emit DTP-relative relocations. */
7257 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7259 fputs (ASM_LONG, file);
7260 output_addr_const (file, x);
7261 fputs ("@DTPOFF", file);
7267 fputs (", 0", file);
7274 /* In the name of slightly smaller debug output, and to cater to
7275 general assembler lossage, recognize PIC+GOTOFF and turn it back
7276 into a direct symbol reference.
7278 On Darwin, this is necessary to avoid a crash, because Darwin
7279 has a different PIC label for each routine but the DWARF debugging
7280 information is not associated with any particular routine, so it's
7281 necessary to remove references to the PIC label from RTL stored by
7282 the DWARF output code. */
7285 ix86_delegitimize_address (rtx orig_x)
7288 /* reg_addend is NULL or a multiple of some register. */
7289 rtx reg_addend = NULL_RTX;
7290 /* const_addend is NULL or a const_int. */
7291 rtx const_addend = NULL_RTX;
7292 /* This is the result, or NULL. */
7293 rtx result = NULL_RTX;
7295 if (GET_CODE (x) == MEM)
7300 if (GET_CODE (x) != CONST
7301 || GET_CODE (XEXP (x, 0)) != UNSPEC
7302 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7303 || GET_CODE (orig_x) != MEM)
7305 return XVECEXP (XEXP (x, 0), 0, 0);
7308 if (GET_CODE (x) != PLUS
7309 || GET_CODE (XEXP (x, 1)) != CONST)
7312 if (GET_CODE (XEXP (x, 0)) == REG
7313 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7314 /* %ebx + GOT/GOTOFF */
7316 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7318 /* %ebx + %reg * scale + GOT/GOTOFF */
7319 reg_addend = XEXP (x, 0);
7320 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7321 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7322 reg_addend = XEXP (reg_addend, 1);
7323 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7324 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7325 reg_addend = XEXP (reg_addend, 0);
7328 if (GET_CODE (reg_addend) != REG
7329 && GET_CODE (reg_addend) != MULT
7330 && GET_CODE (reg_addend) != ASHIFT)
7336 x = XEXP (XEXP (x, 1), 0);
7337 if (GET_CODE (x) == PLUS
7338 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7340 const_addend = XEXP (x, 1);
7344 if (GET_CODE (x) == UNSPEC
7345 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7346 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7347 result = XVECEXP (x, 0, 0);
7349 if (TARGET_MACHO && darwin_local_data_pic (x)
7350 && GET_CODE (orig_x) != MEM)
7351 result = XEXP (x, 0);
7357 result = gen_rtx_PLUS (Pmode, result, const_addend);
7359 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7364 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7369 if (mode == CCFPmode || mode == CCFPUmode)
7371 enum rtx_code second_code, bypass_code;
7372 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7373 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7374 code = ix86_fp_compare_code_to_integer (code);
7378 code = reverse_condition (code);
7389 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7393 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7394 Those same assemblers have the same but opposite lossage on cmov. */
7395 gcc_assert (mode == CCmode);
7396 suffix = fp ? "nbe" : "a";
7416 gcc_assert (mode == CCmode);
7438 gcc_assert (mode == CCmode);
7439 suffix = fp ? "nb" : "ae";
7442 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7446 gcc_assert (mode == CCmode);
7450 suffix = fp ? "u" : "p";
7453 suffix = fp ? "nu" : "np";
7458 fputs (suffix, file);
7461 /* Print the name of register X to FILE based on its machine mode and number.
7462 If CODE is 'w', pretend the mode is HImode.
7463 If CODE is 'b', pretend the mode is QImode.
7464 If CODE is 'k', pretend the mode is SImode.
7465 If CODE is 'q', pretend the mode is DImode.
7466 If CODE is 'h', pretend the reg is the 'high' byte register.
7467 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7470 print_reg (rtx x, int code, FILE *file)
7472 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7473 && REGNO (x) != FRAME_POINTER_REGNUM
7474 && REGNO (x) != FLAGS_REG
7475 && REGNO (x) != FPSR_REG);
7477 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7480 if (code == 'w' || MMX_REG_P (x))
7482 else if (code == 'b')
7484 else if (code == 'k')
7486 else if (code == 'q')
7488 else if (code == 'y')
7490 else if (code == 'h')
7493 code = GET_MODE_SIZE (GET_MODE (x));
7495 /* Irritatingly, AMD extended registers use different naming convention
7496 from the normal registers. */
7497 if (REX_INT_REG_P (x))
7499 gcc_assert (TARGET_64BIT);
7503 error ("extended registers have no high halves");
7506 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7509 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7512 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7515 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7518 error ("unsupported operand size for extended register");
7526 if (STACK_TOP_P (x))
7528 fputs ("st(0)", file);
7535 if (! ANY_FP_REG_P (x))
7536 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7541 fputs (hi_reg_name[REGNO (x)], file);
7544 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7546 fputs (qi_reg_name[REGNO (x)], file);
7549 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7551 fputs (qi_high_reg_name[REGNO (x)], file);
7558 /* Locate some local-dynamic symbol still in use by this function
7559 so that we can print its name in some tls_local_dynamic_base
7563 get_some_local_dynamic_name (void)
7567 if (cfun->machine->some_ld_name)
7568 return cfun->machine->some_ld_name;
7570 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7572 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7573 return cfun->machine->some_ld_name;
7579 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7583 if (GET_CODE (x) == SYMBOL_REF
7584 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7586 cfun->machine->some_ld_name = XSTR (x, 0);
7594 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7595 C -- print opcode suffix for set/cmov insn.
7596 c -- like C, but print reversed condition
7597 F,f -- likewise, but for floating-point.
7598 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7600 R -- print the prefix for register names.
7601 z -- print the opcode suffix for the size of the current operand.
7602 * -- print a star (in certain assembler syntax)
7603 A -- print an absolute memory reference.
7604 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7605 s -- print a shift double count, followed by the assemblers argument
7607 b -- print the QImode name of the register for the indicated operand.
7608 %b0 would print %al if operands[0] is reg 0.
7609 w -- likewise, print the HImode name of the register.
7610 k -- likewise, print the SImode name of the register.
7611 q -- likewise, print the DImode name of the register.
7612 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7613 y -- print "st(0)" instead of "st" as a register.
7614 D -- print condition for SSE cmp instruction.
7615 P -- if PIC, print an @PLT suffix.
7616 X -- don't print any sort of PIC '@' suffix for a symbol.
7617 & -- print some in-use local-dynamic symbol name.
7618 H -- print a memory address offset by 8; used for sse high-parts
7622 print_operand (FILE *file, rtx x, int code)
7629 if (ASSEMBLER_DIALECT == ASM_ATT)
7634 assemble_name (file, get_some_local_dynamic_name ());
7638 switch (ASSEMBLER_DIALECT)
7645 /* Intel syntax. For absolute addresses, registers should not
7646 be surrounded by braces. */
7647 if (GET_CODE (x) != REG)
7650 PRINT_OPERAND (file, x, 0);
7660 PRINT_OPERAND (file, x, 0);
7665 if (ASSEMBLER_DIALECT == ASM_ATT)
7670 if (ASSEMBLER_DIALECT == ASM_ATT)
7675 if (ASSEMBLER_DIALECT == ASM_ATT)
7680 if (ASSEMBLER_DIALECT == ASM_ATT)
7685 if (ASSEMBLER_DIALECT == ASM_ATT)
7690 if (ASSEMBLER_DIALECT == ASM_ATT)
7695 /* 387 opcodes don't get size suffixes if the operands are
7697 if (STACK_REG_P (x))
7700 /* Likewise if using Intel opcodes. */
7701 if (ASSEMBLER_DIALECT == ASM_INTEL)
7704 /* This is the size of op from size of operand. */
7705 switch (GET_MODE_SIZE (GET_MODE (x)))
7708 #ifdef HAVE_GAS_FILDS_FISTS
7714 if (GET_MODE (x) == SFmode)
7729 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7731 #ifdef GAS_MNEMONICS
7757 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7759 PRINT_OPERAND (file, x, 0);
7765 /* Little bit of braindamage here. The SSE compare instructions
7766 does use completely different names for the comparisons that the
7767 fp conditional moves. */
7768 switch (GET_CODE (x))
7783 fputs ("unord", file);
7787 fputs ("neq", file);
7791 fputs ("nlt", file);
7795 fputs ("nle", file);
7798 fputs ("ord", file);
7805 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7806 if (ASSEMBLER_DIALECT == ASM_ATT)
7808 switch (GET_MODE (x))
7810 case HImode: putc ('w', file); break;
7812 case SFmode: putc ('l', file); break;
7814 case DFmode: putc ('q', file); break;
7815 default: gcc_unreachable ();
7822 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7825 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7826 if (ASSEMBLER_DIALECT == ASM_ATT)
7829 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7832 /* Like above, but reverse condition */
7834 /* Check to see if argument to %c is really a constant
7835 and not a condition code which needs to be reversed. */
7836 if (!COMPARISON_P (x))
7838 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7841 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7844 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7845 if (ASSEMBLER_DIALECT == ASM_ATT)
7848 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7852 /* It doesn't actually matter what mode we use here, as we're
7853 only going to use this for printing. */
7854 x = adjust_address_nv (x, DImode, 8);
7861 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7864 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7867 int pred_val = INTVAL (XEXP (x, 0));
7869 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7870 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7872 int taken = pred_val > REG_BR_PROB_BASE / 2;
7873 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7875 /* Emit hints only in the case default branch prediction
7876 heuristics would fail. */
7877 if (taken != cputaken)
7879 /* We use 3e (DS) prefix for taken branches and
7880 2e (CS) prefix for not taken branches. */
7882 fputs ("ds ; ", file);
7884 fputs ("cs ; ", file);
7891 output_operand_lossage ("invalid operand code '%c'", code);
7895 if (GET_CODE (x) == REG)
7896 print_reg (x, code, file);
7898 else if (GET_CODE (x) == MEM)
7900 /* No `byte ptr' prefix for call instructions. */
7901 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7904 switch (GET_MODE_SIZE (GET_MODE (x)))
7906 case 1: size = "BYTE"; break;
7907 case 2: size = "WORD"; break;
7908 case 4: size = "DWORD"; break;
7909 case 8: size = "QWORD"; break;
7910 case 12: size = "XWORD"; break;
7911 case 16: size = "XMMWORD"; break;
7916 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7919 else if (code == 'w')
7921 else if (code == 'k')
7925 fputs (" PTR ", file);
7929 /* Avoid (%rip) for call operands. */
7930 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7931 && GET_CODE (x) != CONST_INT)
7932 output_addr_const (file, x);
7933 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7934 output_operand_lossage ("invalid constraints for operand");
7939 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7944 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7945 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7947 if (ASSEMBLER_DIALECT == ASM_ATT)
7949 fprintf (file, "0x%08lx", l);
7952 /* These float cases don't actually occur as immediate operands. */
7953 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7957 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7958 fprintf (file, "%s", dstr);
7961 else if (GET_CODE (x) == CONST_DOUBLE
7962 && GET_MODE (x) == XFmode)
7966 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7967 fprintf (file, "%s", dstr);
7972 /* We have patterns that allow zero sets of memory, for instance.
7973 In 64-bit mode, we should probably support all 8-byte vectors,
7974 since we can in fact encode that into an immediate. */
7975 if (GET_CODE (x) == CONST_VECTOR)
7977 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7983 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7985 if (ASSEMBLER_DIALECT == ASM_ATT)
7988 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7989 || GET_CODE (x) == LABEL_REF)
7991 if (ASSEMBLER_DIALECT == ASM_ATT)
7994 fputs ("OFFSET FLAT:", file);
7997 if (GET_CODE (x) == CONST_INT)
7998 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8000 output_pic_addr_const (file, x, code);
8002 output_addr_const (file, x);
8006 /* Print a memory operand whose address is ADDR. */
8009 print_operand_address (FILE *file, rtx addr)
8011 struct ix86_address parts;
8012 rtx base, index, disp;
8014 int ok = ix86_decompose_address (addr, &parts);
8019 index = parts.index;
8021 scale = parts.scale;
8029 if (USER_LABEL_PREFIX[0] == 0)
8031 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8037 if (!base && !index)
8039 /* Displacement only requires special attention. */
8041 if (GET_CODE (disp) == CONST_INT)
8043 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8045 if (USER_LABEL_PREFIX[0] == 0)
8047 fputs ("ds:", file);
8049 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8052 output_pic_addr_const (file, disp, 0);
8054 output_addr_const (file, disp);
8056 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8059 if (GET_CODE (disp) == CONST
8060 && GET_CODE (XEXP (disp, 0)) == PLUS
8061 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8062 disp = XEXP (XEXP (disp, 0), 0);
8063 if (GET_CODE (disp) == LABEL_REF
8064 || (GET_CODE (disp) == SYMBOL_REF
8065 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8066 fputs ("(%rip)", file);
8071 if (ASSEMBLER_DIALECT == ASM_ATT)
8076 output_pic_addr_const (file, disp, 0);
8077 else if (GET_CODE (disp) == LABEL_REF)
8078 output_asm_label (disp);
8080 output_addr_const (file, disp);
8085 print_reg (base, 0, file);
8089 print_reg (index, 0, file);
8091 fprintf (file, ",%d", scale);
8097 rtx offset = NULL_RTX;
8101 /* Pull out the offset of a symbol; print any symbol itself. */
8102 if (GET_CODE (disp) == CONST
8103 && GET_CODE (XEXP (disp, 0)) == PLUS
8104 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8106 offset = XEXP (XEXP (disp, 0), 1);
8107 disp = gen_rtx_CONST (VOIDmode,
8108 XEXP (XEXP (disp, 0), 0));
8112 output_pic_addr_const (file, disp, 0);
8113 else if (GET_CODE (disp) == LABEL_REF)
8114 output_asm_label (disp);
8115 else if (GET_CODE (disp) == CONST_INT)
8118 output_addr_const (file, disp);
8124 print_reg (base, 0, file);
8127 if (INTVAL (offset) >= 0)
8129 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8133 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8140 print_reg (index, 0, file);
8142 fprintf (file, "*%d", scale);
8150 output_addr_const_extra (FILE *file, rtx x)
8154 if (GET_CODE (x) != UNSPEC)
8157 op = XVECEXP (x, 0, 0);
8158 switch (XINT (x, 1))
8160 case UNSPEC_GOTTPOFF:
8161 output_addr_const (file, op);
8162 /* FIXME: This might be @TPOFF in Sun ld. */
8163 fputs ("@GOTTPOFF", file);
8166 output_addr_const (file, op);
8167 fputs ("@TPOFF", file);
8170 output_addr_const (file, op);
8172 fputs ("@TPOFF", file);
8174 fputs ("@NTPOFF", file);
8177 output_addr_const (file, op);
8178 fputs ("@DTPOFF", file);
8180 case UNSPEC_GOTNTPOFF:
8181 output_addr_const (file, op);
8183 fputs ("@GOTTPOFF(%rip)", file);
8185 fputs ("@GOTNTPOFF", file);
8187 case UNSPEC_INDNTPOFF:
8188 output_addr_const (file, op);
8189 fputs ("@INDNTPOFF", file);
8199 /* Split one or more DImode RTL references into pairs of SImode
8200 references. The RTL can be REG, offsettable MEM, integer constant, or
8201 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8202 split and "num" is its length. lo_half and hi_half are output arrays
8203 that parallel "operands". */
8206 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8210 rtx op = operands[num];
8212 /* simplify_subreg refuse to split volatile memory addresses,
8213 but we still have to handle it. */
8214 if (GET_CODE (op) == MEM)
8216 lo_half[num] = adjust_address (op, SImode, 0);
8217 hi_half[num] = adjust_address (op, SImode, 4);
8221 lo_half[num] = simplify_gen_subreg (SImode, op,
8222 GET_MODE (op) == VOIDmode
8223 ? DImode : GET_MODE (op), 0);
8224 hi_half[num] = simplify_gen_subreg (SImode, op,
8225 GET_MODE (op) == VOIDmode
8226 ? DImode : GET_MODE (op), 4);
8230 /* Split one or more TImode RTL references into pairs of DImode
8231 references. The RTL can be REG, offsettable MEM, integer constant, or
8232 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8233 split and "num" is its length. lo_half and hi_half are output arrays
8234 that parallel "operands". */
8237 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8241 rtx op = operands[num];
8243 /* simplify_subreg refuse to split volatile memory addresses, but we
8244 still have to handle it. */
8245 if (GET_CODE (op) == MEM)
8247 lo_half[num] = adjust_address (op, DImode, 0);
8248 hi_half[num] = adjust_address (op, DImode, 8);
8252 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8253 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8258 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8259 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8260 is the expression of the binary operation. The output may either be
8261 emitted here, or returned to the caller, like all output_* functions.
8263 There is no guarantee that the operands are the same mode, as they
8264 might be within FLOAT or FLOAT_EXTEND expressions. */
8266 #ifndef SYSV386_COMPAT
8267 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8268 wants to fix the assemblers because that causes incompatibility
8269 with gcc. No-one wants to fix gcc because that causes
8270 incompatibility with assemblers... You can use the option of
8271 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8272 #define SYSV386_COMPAT 1
8276 output_387_binary_op (rtx insn, rtx *operands)
8278 static char buf[30];
8281 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8283 #ifdef ENABLE_CHECKING
8284 /* Even if we do not want to check the inputs, this documents input
8285 constraints. Which helps in understanding the following code. */
8286 if (STACK_REG_P (operands[0])
8287 && ((REG_P (operands[1])
8288 && REGNO (operands[0]) == REGNO (operands[1])
8289 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8290 || (REG_P (operands[2])
8291 && REGNO (operands[0]) == REGNO (operands[2])
8292 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8293 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8296 gcc_assert (is_sse);
8299 switch (GET_CODE (operands[3]))
8302 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8303 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8311 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8312 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8320 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8321 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8329 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8330 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8344 if (GET_MODE (operands[0]) == SFmode)
8345 strcat (buf, "ss\t{%2, %0|%0, %2}");
8347 strcat (buf, "sd\t{%2, %0|%0, %2}");
8352 switch (GET_CODE (operands[3]))
8356 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8358 rtx temp = operands[2];
8359 operands[2] = operands[1];
8363 /* know operands[0] == operands[1]. */
8365 if (GET_CODE (operands[2]) == MEM)
8371 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8373 if (STACK_TOP_P (operands[0]))
8374 /* How is it that we are storing to a dead operand[2]?
8375 Well, presumably operands[1] is dead too. We can't
8376 store the result to st(0) as st(0) gets popped on this
8377 instruction. Instead store to operands[2] (which I
8378 think has to be st(1)). st(1) will be popped later.
8379 gcc <= 2.8.1 didn't have this check and generated
8380 assembly code that the Unixware assembler rejected. */
8381 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8383 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8387 if (STACK_TOP_P (operands[0]))
8388 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8390 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8395 if (GET_CODE (operands[1]) == MEM)
8401 if (GET_CODE (operands[2]) == MEM)
8407 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8410 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8411 derived assemblers, confusingly reverse the direction of
8412 the operation for fsub{r} and fdiv{r} when the
8413 destination register is not st(0). The Intel assembler
8414 doesn't have this brain damage. Read !SYSV386_COMPAT to
8415 figure out what the hardware really does. */
8416 if (STACK_TOP_P (operands[0]))
8417 p = "{p\t%0, %2|rp\t%2, %0}";
8419 p = "{rp\t%2, %0|p\t%0, %2}";
8421 if (STACK_TOP_P (operands[0]))
8422 /* As above for fmul/fadd, we can't store to st(0). */
8423 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8425 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8430 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8433 if (STACK_TOP_P (operands[0]))
8434 p = "{rp\t%0, %1|p\t%1, %0}";
8436 p = "{p\t%1, %0|rp\t%0, %1}";
8438 if (STACK_TOP_P (operands[0]))
8439 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8441 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8446 if (STACK_TOP_P (operands[0]))
8448 if (STACK_TOP_P (operands[1]))
8449 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8451 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8454 else if (STACK_TOP_P (operands[1]))
8457 p = "{\t%1, %0|r\t%0, %1}";
8459 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8465 p = "{r\t%2, %0|\t%0, %2}";
8467 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8480 /* Return needed mode for entity in optimize_mode_switching pass. */
8483 ix86_mode_needed (int entity, rtx insn)
8485 enum attr_i387_cw mode;
8487 /* The mode UNINITIALIZED is used to store control word after a
8488 function call or ASM pattern. The mode ANY specify that function
8489 has no requirements on the control word and make no changes in the
8490 bits we are interested in. */
8493 || (NONJUMP_INSN_P (insn)
8494 && (asm_noperands (PATTERN (insn)) >= 0
8495 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8496 return I387_CW_UNINITIALIZED;
8498 if (recog_memoized (insn) < 0)
8501 mode = get_attr_i387_cw (insn);
8506 if (mode == I387_CW_TRUNC)
8511 if (mode == I387_CW_FLOOR)
8516 if (mode == I387_CW_CEIL)
8521 if (mode == I387_CW_MASK_PM)
8532 /* Output code to initialize control word copies used by trunc?f?i and
8533 rounding patterns. CURRENT_MODE is set to current control word,
8534 while NEW_MODE is set to new control word. */
8537 emit_i387_cw_initialization (int mode)
8539 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8544 rtx reg = gen_reg_rtx (HImode);
8546 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8547 emit_move_insn (reg, stored_mode);
8549 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8554 /* round toward zero (truncate) */
8555 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8556 slot = SLOT_CW_TRUNC;
8560 /* round down toward -oo */
8561 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8562 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8563 slot = SLOT_CW_FLOOR;
8567 /* round up toward +oo */
8568 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8569 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8570 slot = SLOT_CW_CEIL;
8573 case I387_CW_MASK_PM:
8574 /* mask precision exception for nearbyint() */
8575 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8576 slot = SLOT_CW_MASK_PM;
8588 /* round toward zero (truncate) */
8589 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8590 slot = SLOT_CW_TRUNC;
8594 /* round down toward -oo */
8595 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8596 slot = SLOT_CW_FLOOR;
8600 /* round up toward +oo */
8601 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8602 slot = SLOT_CW_CEIL;
8605 case I387_CW_MASK_PM:
8606 /* mask precision exception for nearbyint() */
8607 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8608 slot = SLOT_CW_MASK_PM;
8616 gcc_assert (slot < MAX_386_STACK_LOCALS);
8618 new_mode = assign_386_stack_local (HImode, slot);
8619 emit_move_insn (new_mode, reg);
8622 /* Output code for INSN to convert a float to a signed int. OPERANDS
8623 are the insn operands. The output may be [HSD]Imode and the input
8624 operand may be [SDX]Fmode. */
8627 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8629 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8630 int dimode_p = GET_MODE (operands[0]) == DImode;
8631 int round_mode = get_attr_i387_cw (insn);
8633 /* Jump through a hoop or two for DImode, since the hardware has no
8634 non-popping instruction. We used to do this a different way, but
8635 that was somewhat fragile and broke with post-reload splitters. */
8636 if ((dimode_p || fisttp) && !stack_top_dies)
8637 output_asm_insn ("fld\t%y1", operands);
8639 gcc_assert (STACK_TOP_P (operands[1]));
8640 gcc_assert (GET_CODE (operands[0]) == MEM);
8643 output_asm_insn ("fisttp%z0\t%0", operands);
8646 if (round_mode != I387_CW_ANY)
8647 output_asm_insn ("fldcw\t%3", operands);
8648 if (stack_top_dies || dimode_p)
8649 output_asm_insn ("fistp%z0\t%0", operands);
8651 output_asm_insn ("fist%z0\t%0", operands);
8652 if (round_mode != I387_CW_ANY)
8653 output_asm_insn ("fldcw\t%2", operands);
8659 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8660 have the values zero or one, indicates the ffreep insn's operand
8661 from the OPERANDS array. */
8664 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8666 if (TARGET_USE_FFREEP)
8667 #if HAVE_AS_IX86_FFREEP
8668 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8670 switch (REGNO (operands[opno]))
8672 case FIRST_STACK_REG + 0: return ".word\t0xc0df";
8673 case FIRST_STACK_REG + 1: return ".word\t0xc1df";
8674 case FIRST_STACK_REG + 2: return ".word\t0xc2df";
8675 case FIRST_STACK_REG + 3: return ".word\t0xc3df";
8676 case FIRST_STACK_REG + 4: return ".word\t0xc4df";
8677 case FIRST_STACK_REG + 5: return ".word\t0xc5df";
8678 case FIRST_STACK_REG + 6: return ".word\t0xc6df";
8679 case FIRST_STACK_REG + 7: return ".word\t0xc7df";
8683 return opno ? "fstp\t%y1" : "fstp\t%y0";
8687 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8688 should be used. UNORDERED_P is true when fucom should be used. */
8691 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8694 rtx cmp_op0, cmp_op1;
8695 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8699 cmp_op0 = operands[0];
8700 cmp_op1 = operands[1];
8704 cmp_op0 = operands[1];
8705 cmp_op1 = operands[2];
8710 if (GET_MODE (operands[0]) == SFmode)
8712 return "ucomiss\t{%1, %0|%0, %1}";
8714 return "comiss\t{%1, %0|%0, %1}";
8717 return "ucomisd\t{%1, %0|%0, %1}";
8719 return "comisd\t{%1, %0|%0, %1}";
8722 gcc_assert (STACK_TOP_P (cmp_op0));
8724 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8726 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8730 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8731 return output_387_ffreep (operands, 1);
8734 return "ftst\n\tfnstsw\t%0";
8737 if (STACK_REG_P (cmp_op1)
8739 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8740 && REGNO (cmp_op1) != FIRST_STACK_REG)
8742 /* If both the top of the 387 stack dies, and the other operand
8743 is also a stack register that dies, then this must be a
8744 `fcompp' float compare */
8748 /* There is no double popping fcomi variant. Fortunately,
8749 eflags is immune from the fstp's cc clobbering. */
8751 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8753 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8754 return output_387_ffreep (operands, 0);
8759 return "fucompp\n\tfnstsw\t%0";
8761 return "fcompp\n\tfnstsw\t%0";
8766 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8768 static const char * const alt[16] =
8770 "fcom%z2\t%y2\n\tfnstsw\t%0",
8771 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8772 "fucom%z2\t%y2\n\tfnstsw\t%0",
8773 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8775 "ficom%z2\t%y2\n\tfnstsw\t%0",
8776 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8780 "fcomi\t{%y1, %0|%0, %y1}",
8781 "fcomip\t{%y1, %0|%0, %y1}",
8782 "fucomi\t{%y1, %0|%0, %y1}",
8783 "fucomip\t{%y1, %0|%0, %y1}",
8794 mask = eflags_p << 3;
8795 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8796 mask |= unordered_p << 1;
8797 mask |= stack_top_dies;
8799 gcc_assert (mask < 16);
8808 ix86_output_addr_vec_elt (FILE *file, int value)
8810 const char *directive = ASM_LONG;
8814 directive = ASM_QUAD;
8816 gcc_assert (!TARGET_64BIT);
8819 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8823 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8826 fprintf (file, "%s%s%d-%s%d\n",
8827 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8828 else if (HAVE_AS_GOTOFF_IN_DATA)
8829 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8831 else if (TARGET_MACHO)
8833 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8834 machopic_output_function_base_name (file);
8835 fprintf(file, "\n");
8839 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8840 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8843 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8847 ix86_expand_clear (rtx dest)
8851 /* We play register width games, which are only valid after reload. */
8852 gcc_assert (reload_completed);
8854 /* Avoid HImode and its attendant prefix byte. */
8855 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8856 dest = gen_rtx_REG (SImode, REGNO (dest));
8858 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8860 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8861 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8863 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8864 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8870 /* X is an unchanging MEM. If it is a constant pool reference, return
8871 the constant pool rtx, else NULL. */
8874 maybe_get_pool_constant (rtx x)
8876 x = ix86_delegitimize_address (XEXP (x, 0));
8878 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8879 return get_pool_constant (x);
8885 ix86_expand_move (enum machine_mode mode, rtx operands[])
8887 int strict = (reload_in_progress || reload_completed);
8889 enum tls_model model;
8894 if (GET_CODE (op1) == SYMBOL_REF)
8896 model = SYMBOL_REF_TLS_MODEL (op1);
8899 op1 = legitimize_tls_address (op1, model, true);
8900 op1 = force_operand (op1, op0);
8905 else if (GET_CODE (op1) == CONST
8906 && GET_CODE (XEXP (op1, 0)) == PLUS
8907 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8909 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8912 rtx addend = XEXP (XEXP (op1, 0), 1);
8913 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8914 op1 = force_operand (op1, NULL);
8915 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8916 op0, 1, OPTAB_DIRECT);
8922 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8924 if (TARGET_MACHO && !TARGET_64BIT)
8929 rtx temp = ((reload_in_progress
8930 || ((op0 && GET_CODE (op0) == REG)
8932 ? op0 : gen_reg_rtx (Pmode));
8933 op1 = machopic_indirect_data_reference (op1, temp);
8934 op1 = machopic_legitimize_pic_address (op1, mode,
8935 temp == op1 ? 0 : temp);
8937 else if (MACHOPIC_INDIRECT)
8938 op1 = machopic_indirect_data_reference (op1, 0);
8945 if (GET_CODE (op0) == MEM)
8946 op1 = force_reg (Pmode, op1);
8948 op1 = legitimize_address (op1, op1, Pmode);
8953 if (GET_CODE (op0) == MEM
8954 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8955 || !push_operand (op0, mode))
8956 && GET_CODE (op1) == MEM)
8957 op1 = force_reg (mode, op1);
8959 if (push_operand (op0, mode)
8960 && ! general_no_elim_operand (op1, mode))
8961 op1 = copy_to_mode_reg (mode, op1);
8963 /* Force large constants in 64bit compilation into register
8964 to get them CSEed. */
8965 if (TARGET_64BIT && mode == DImode
8966 && immediate_operand (op1, mode)
8967 && !x86_64_zext_immediate_operand (op1, VOIDmode)
8968 && !register_operand (op0, mode)
8969 && optimize && !reload_completed && !reload_in_progress)
8970 op1 = copy_to_mode_reg (mode, op1);
8972 if (FLOAT_MODE_P (mode))
8974 /* If we are loading a floating point constant to a register,
8975 force the value to memory now, since we'll get better code
8976 out the back end. */
8980 else if (GET_CODE (op1) == CONST_DOUBLE)
8982 op1 = validize_mem (force_const_mem (mode, op1));
8983 if (!register_operand (op0, mode))
8985 rtx temp = gen_reg_rtx (mode);
8986 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8987 emit_move_insn (op0, temp);
8994 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8998 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9000 rtx op0 = operands[0], op1 = operands[1];
9002 /* Force constants other than zero into memory. We do not know how
9003 the instructions used to build constants modify the upper 64 bits
9004 of the register, once we have that information we may be able
9005 to handle some of them more efficiently. */
9006 if ((reload_in_progress | reload_completed) == 0
9007 && register_operand (op0, mode)
9009 && standard_sse_constant_p (op1) <= 0)
9010 op1 = validize_mem (force_const_mem (mode, op1));
9012 /* Make operand1 a register if it isn't already. */
9014 && !register_operand (op0, mode)
9015 && !register_operand (op1, mode))
9017 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9021 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9024 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9025 straight to ix86_expand_vector_move. */
9028 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9037 /* If we're optimizing for size, movups is the smallest. */
9040 op0 = gen_lowpart (V4SFmode, op0);
9041 op1 = gen_lowpart (V4SFmode, op1);
9042 emit_insn (gen_sse_movups (op0, op1));
9046 /* ??? If we have typed data, then it would appear that using
9047 movdqu is the only way to get unaligned data loaded with
9049 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9051 op0 = gen_lowpart (V16QImode, op0);
9052 op1 = gen_lowpart (V16QImode, op1);
9053 emit_insn (gen_sse2_movdqu (op0, op1));
9057 if (TARGET_SSE2 && mode == V2DFmode)
9061 /* When SSE registers are split into halves, we can avoid
9062 writing to the top half twice. */
9063 if (TARGET_SSE_SPLIT_REGS)
9065 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9070 /* ??? Not sure about the best option for the Intel chips.
9071 The following would seem to satisfy; the register is
9072 entirely cleared, breaking the dependency chain. We
9073 then store to the upper half, with a dependency depth
9074 of one. A rumor has it that Intel recommends two movsd
9075 followed by an unpacklpd, but this is unconfirmed. And
9076 given that the dependency depth of the unpacklpd would
9077 still be one, I'm not sure why this would be better. */
9078 zero = CONST0_RTX (V2DFmode);
9081 m = adjust_address (op1, DFmode, 0);
9082 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9083 m = adjust_address (op1, DFmode, 8);
9084 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9088 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9089 emit_move_insn (op0, CONST0_RTX (mode));
9091 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9093 if (mode != V4SFmode)
9094 op0 = gen_lowpart (V4SFmode, op0);
9095 m = adjust_address (op1, V2SFmode, 0);
9096 emit_insn (gen_sse_loadlps (op0, op0, m));
9097 m = adjust_address (op1, V2SFmode, 8);
9098 emit_insn (gen_sse_loadhps (op0, op0, m));
9101 else if (MEM_P (op0))
9103 /* If we're optimizing for size, movups is the smallest. */
9106 op0 = gen_lowpart (V4SFmode, op0);
9107 op1 = gen_lowpart (V4SFmode, op1);
9108 emit_insn (gen_sse_movups (op0, op1));
9112 /* ??? Similar to above, only less clear because of quote
9113 typeless stores unquote. */
9114 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9115 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9117 op0 = gen_lowpart (V16QImode, op0);
9118 op1 = gen_lowpart (V16QImode, op1);
9119 emit_insn (gen_sse2_movdqu (op0, op1));
9123 if (TARGET_SSE2 && mode == V2DFmode)
9125 m = adjust_address (op0, DFmode, 0);
9126 emit_insn (gen_sse2_storelpd (m, op1));
9127 m = adjust_address (op0, DFmode, 8);
9128 emit_insn (gen_sse2_storehpd (m, op1));
9132 if (mode != V4SFmode)
9133 op1 = gen_lowpart (V4SFmode, op1);
9134 m = adjust_address (op0, V2SFmode, 0);
9135 emit_insn (gen_sse_storelps (m, op1));
9136 m = adjust_address (op0, V2SFmode, 8);
9137 emit_insn (gen_sse_storehps (m, op1));
9144 /* Expand a push in MODE. This is some mode for which we do not support
9145 proper push instructions, at least from the registers that we expect
9146 the value to live in. */
9149 ix86_expand_push (enum machine_mode mode, rtx x)
9153 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9154 GEN_INT (-GET_MODE_SIZE (mode)),
9155 stack_pointer_rtx, 1, OPTAB_DIRECT);
9156 if (tmp != stack_pointer_rtx)
9157 emit_move_insn (stack_pointer_rtx, tmp);
9159 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9160 emit_move_insn (tmp, x);
9163 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9164 destination to use for the operation. If different from the true
9165 destination in operands[0], a copy operation will be required. */
9168 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9171 int matching_memory;
9172 rtx src1, src2, dst;
9178 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9179 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9180 && (rtx_equal_p (dst, src2)
9181 || immediate_operand (src1, mode)))
9188 /* If the destination is memory, and we do not have matching source
9189 operands, do things in registers. */
9190 matching_memory = 0;
9191 if (GET_CODE (dst) == MEM)
9193 if (rtx_equal_p (dst, src1))
9194 matching_memory = 1;
9195 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9196 && rtx_equal_p (dst, src2))
9197 matching_memory = 2;
9199 dst = gen_reg_rtx (mode);
9202 /* Both source operands cannot be in memory. */
9203 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9205 if (matching_memory != 2)
9206 src2 = force_reg (mode, src2);
9208 src1 = force_reg (mode, src1);
9211 /* If the operation is not commutable, source 1 cannot be a constant
9212 or non-matching memory. */
9213 if ((CONSTANT_P (src1)
9214 || (!matching_memory && GET_CODE (src1) == MEM))
9215 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9216 src1 = force_reg (mode, src1);
9218 src1 = operands[1] = src1;
9219 src2 = operands[2] = src2;
9223 /* Similarly, but assume that the destination has already been
9227 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9228 enum machine_mode mode, rtx operands[])
9230 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9231 gcc_assert (dst == operands[0]);
9234 /* Attempt to expand a binary operator. Make the expansion closer to the
9235 actual machine, then just general_operand, which will allow 3 separate
9236 memory references (one output, two input) in a single insn. */
9239 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9242 rtx src1, src2, dst, op, clob;
9244 dst = ix86_fixup_binary_operands (code, mode, operands);
9248 /* Emit the instruction. */
9250 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9251 if (reload_in_progress)
9253 /* Reload doesn't know about the flags register, and doesn't know that
9254 it doesn't want to clobber it. We can only do this with PLUS. */
9255 gcc_assert (code == PLUS);
9260 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9261 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9264 /* Fix up the destination if needed. */
9265 if (dst != operands[0])
9266 emit_move_insn (operands[0], dst);
9269 /* Return TRUE or FALSE depending on whether the binary operator meets the
9270 appropriate constraints. */
9273 ix86_binary_operator_ok (enum rtx_code code,
9274 enum machine_mode mode ATTRIBUTE_UNUSED,
9277 /* Both source operands cannot be in memory. */
9278 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9280 /* If the operation is not commutable, source 1 cannot be a constant. */
9281 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9283 /* If the destination is memory, we must have a matching source operand. */
9284 if (GET_CODE (operands[0]) == MEM
9285 && ! (rtx_equal_p (operands[0], operands[1])
9286 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9287 && rtx_equal_p (operands[0], operands[2]))))
9289 /* If the operation is not commutable and the source 1 is memory, we must
9290 have a matching destination. */
9291 if (GET_CODE (operands[1]) == MEM
9292 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9293 && ! rtx_equal_p (operands[0], operands[1]))
9298 /* Attempt to expand a unary operator. Make the expansion closer to the
9299 actual machine, then just general_operand, which will allow 2 separate
9300 memory references (one output, one input) in a single insn. */
9303 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9306 int matching_memory;
9307 rtx src, dst, op, clob;
9312 /* If the destination is memory, and we do not have matching source
9313 operands, do things in registers. */
9314 matching_memory = 0;
9317 if (rtx_equal_p (dst, src))
9318 matching_memory = 1;
9320 dst = gen_reg_rtx (mode);
9323 /* When source operand is memory, destination must match. */
9324 if (MEM_P (src) && !matching_memory)
9325 src = force_reg (mode, src);
9327 /* Emit the instruction. */
9329 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9330 if (reload_in_progress || code == NOT)
9332 /* Reload doesn't know about the flags register, and doesn't know that
9333 it doesn't want to clobber it. */
9334 gcc_assert (code == NOT);
9339 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9340 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9343 /* Fix up the destination if needed. */
9344 if (dst != operands[0])
9345 emit_move_insn (operands[0], dst);
9348 /* Return TRUE or FALSE depending on whether the unary operator meets the
9349 appropriate constraints. */
9352 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9353 enum machine_mode mode ATTRIBUTE_UNUSED,
9354 rtx operands[2] ATTRIBUTE_UNUSED)
9356 /* If one of operands is memory, source and destination must match. */
9357 if ((GET_CODE (operands[0]) == MEM
9358 || GET_CODE (operands[1]) == MEM)
9359 && ! rtx_equal_p (operands[0], operands[1]))
9364 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9365 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9366 true, then replicate the mask for all elements of the vector register.
9367 If INVERT is true, then create a mask excluding the sign bit. */
9370 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9372 enum machine_mode vec_mode;
9373 HOST_WIDE_INT hi, lo;
9378 /* Find the sign bit, sign extended to 2*HWI. */
9380 lo = 0x80000000, hi = lo < 0;
9381 else if (HOST_BITS_PER_WIDE_INT >= 64)
9382 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9384 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9389 /* Force this value into the low part of a fp vector constant. */
9390 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9391 mask = gen_lowpart (mode, mask);
9396 v = gen_rtvec (4, mask, mask, mask, mask);
9398 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9399 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9400 vec_mode = V4SFmode;
9405 v = gen_rtvec (2, mask, mask);
9407 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9408 vec_mode = V2DFmode;
9411 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9414 /* Generate code for floating point ABS or NEG. */
9417 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9420 rtx mask, set, use, clob, dst, src;
9421 bool matching_memory;
9422 bool use_sse = false;
9423 bool vector_mode = VECTOR_MODE_P (mode);
9424 enum machine_mode elt_mode = mode;
9428 elt_mode = GET_MODE_INNER (mode);
9431 else if (TARGET_SSE_MATH)
9432 use_sse = SSE_FLOAT_MODE_P (mode);
9434 /* NEG and ABS performed with SSE use bitwise mask operations.
9435 Create the appropriate mask now. */
9437 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9444 /* If the destination is memory, and we don't have matching source
9445 operands or we're using the x87, do things in registers. */
9446 matching_memory = false;
9449 if (use_sse && rtx_equal_p (dst, src))
9450 matching_memory = true;
9452 dst = gen_reg_rtx (mode);
9454 if (MEM_P (src) && !matching_memory)
9455 src = force_reg (mode, src);
9459 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9460 set = gen_rtx_SET (VOIDmode, dst, set);
9465 set = gen_rtx_fmt_e (code, mode, src);
9466 set = gen_rtx_SET (VOIDmode, dst, set);
9469 use = gen_rtx_USE (VOIDmode, mask);
9470 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9471 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9472 gen_rtvec (3, set, use, clob)));
9478 if (dst != operands[0])
9479 emit_move_insn (operands[0], dst);
9482 /* Expand a copysign operation. Special case operand 0 being a constant. */
9485 ix86_expand_copysign (rtx operands[])
9487 enum machine_mode mode, vmode;
9488 rtx dest, op0, op1, mask, nmask;
9494 mode = GET_MODE (dest);
9495 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9497 if (GET_CODE (op0) == CONST_DOUBLE)
9501 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9502 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9504 if (op0 == CONST0_RTX (mode))
9505 op0 = CONST0_RTX (vmode);
9509 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9510 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9512 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9513 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9516 mask = ix86_build_signbit_mask (mode, 0, 0);
9519 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9521 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9525 nmask = ix86_build_signbit_mask (mode, 0, 1);
9526 mask = ix86_build_signbit_mask (mode, 0, 0);
9529 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9531 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9535 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9536 be a constant, and so has already been expanded into a vector constant. */
9539 ix86_split_copysign_const (rtx operands[])
9541 enum machine_mode mode, vmode;
9542 rtx dest, op0, op1, mask, x;
9549 mode = GET_MODE (dest);
9550 vmode = GET_MODE (mask);
9552 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9553 x = gen_rtx_AND (vmode, dest, mask);
9554 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9556 if (op0 != CONST0_RTX (vmode))
9558 x = gen_rtx_IOR (vmode, dest, op0);
9559 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9563 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9564 so we have to do two masks. */
9567 ix86_split_copysign_var (rtx operands[])
9569 enum machine_mode mode, vmode;
9570 rtx dest, scratch, op0, op1, mask, nmask, x;
9573 scratch = operands[1];
9576 nmask = operands[4];
9579 mode = GET_MODE (dest);
9580 vmode = GET_MODE (mask);
9582 if (rtx_equal_p (op0, op1))
9584 /* Shouldn't happen often (it's useless, obviously), but when it does
9585 we'd generate incorrect code if we continue below. */
9586 emit_move_insn (dest, op0);
9590 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9592 gcc_assert (REGNO (op1) == REGNO (scratch));
9594 x = gen_rtx_AND (vmode, scratch, mask);
9595 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9598 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9599 x = gen_rtx_NOT (vmode, dest);
9600 x = gen_rtx_AND (vmode, x, op0);
9601 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9605 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9607 x = gen_rtx_AND (vmode, scratch, mask);
9609 else /* alternative 2,4 */
9611 gcc_assert (REGNO (mask) == REGNO (scratch));
9612 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9613 x = gen_rtx_AND (vmode, scratch, op1);
9615 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9617 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9619 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9620 x = gen_rtx_AND (vmode, dest, nmask);
9622 else /* alternative 3,4 */
9624 gcc_assert (REGNO (nmask) == REGNO (dest));
9626 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9627 x = gen_rtx_AND (vmode, dest, op0);
9629 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9632 x = gen_rtx_IOR (vmode, dest, scratch);
9633 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9636 /* Return TRUE or FALSE depending on whether the first SET in INSN
9637 has source and destination with matching CC modes, and that the
9638 CC mode is at least as constrained as REQ_MODE. */
9641 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9644 enum machine_mode set_mode;
9646 set = PATTERN (insn);
9647 if (GET_CODE (set) == PARALLEL)
9648 set = XVECEXP (set, 0, 0);
9649 gcc_assert (GET_CODE (set) == SET);
9650 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9652 set_mode = GET_MODE (SET_DEST (set));
9656 if (req_mode != CCNOmode
9657 && (req_mode != CCmode
9658 || XEXP (SET_SRC (set), 1) != const0_rtx))
9662 if (req_mode == CCGCmode)
9666 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9670 if (req_mode == CCZmode)
9680 return (GET_MODE (SET_SRC (set)) == set_mode);
9683 /* Generate insn patterns to do an integer compare of OPERANDS. */
9686 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9688 enum machine_mode cmpmode;
9691 cmpmode = SELECT_CC_MODE (code, op0, op1);
9692 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9694 /* This is very simple, but making the interface the same as in the
9695 FP case makes the rest of the code easier. */
9696 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9697 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9699 /* Return the test that should be put into the flags user, i.e.
9700 the bcc, scc, or cmov instruction. */
9701 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9704 /* Figure out whether to use ordered or unordered fp comparisons.
9705 Return the appropriate mode to use. */
9708 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9710 /* ??? In order to make all comparisons reversible, we do all comparisons
9711 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9712 all forms trapping and nontrapping comparisons, we can make inequality
9713 comparisons trapping again, since it results in better code when using
9714 FCOM based compares. */
9715 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9719 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9721 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9722 return ix86_fp_compare_mode (code);
9725 /* Only zero flag is needed. */
9727 case NE: /* ZF!=0 */
9729 /* Codes needing carry flag. */
9730 case GEU: /* CF=0 */
9731 case GTU: /* CF=0 & ZF=0 */
9732 case LTU: /* CF=1 */
9733 case LEU: /* CF=1 | ZF=1 */
9735 /* Codes possibly doable only with sign flag when
9736 comparing against zero. */
9737 case GE: /* SF=OF or SF=0 */
9738 case LT: /* SF<>OF or SF=1 */
9739 if (op1 == const0_rtx)
9742 /* For other cases Carry flag is not required. */
9744 /* Codes doable only with sign flag when comparing
9745 against zero, but we miss jump instruction for it
9746 so we need to use relational tests against overflow
9747 that thus needs to be zero. */
9748 case GT: /* ZF=0 & SF=OF */
9749 case LE: /* ZF=1 | SF<>OF */
9750 if (op1 == const0_rtx)
9754 /* strcmp pattern do (use flags) and combine may ask us for proper
9763 /* Return the fixed registers used for condition codes. */
9766 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9773 /* If two condition code modes are compatible, return a condition code
9774 mode which is compatible with both. Otherwise, return
9777 static enum machine_mode
9778 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9783 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9786 if ((m1 == CCGCmode && m2 == CCGOCmode)
9787 || (m1 == CCGOCmode && m2 == CCGCmode))
9815 /* These are only compatible with themselves, which we already
9821 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9824 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9826 enum rtx_code swapped_code = swap_condition (code);
9827 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9828 || (ix86_fp_comparison_cost (swapped_code)
9829 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9832 /* Swap, force into registers, or otherwise massage the two operands
9833 to a fp comparison. The operands are updated in place; the new
9834 comparison code is returned. */
9836 static enum rtx_code
9837 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9839 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9840 rtx op0 = *pop0, op1 = *pop1;
9841 enum machine_mode op_mode = GET_MODE (op0);
9842 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9844 /* All of the unordered compare instructions only work on registers.
9845 The same is true of the fcomi compare instructions. The XFmode
9846 compare instructions require registers except when comparing
9847 against zero or when converting operand 1 from fixed point to
9851 && (fpcmp_mode == CCFPUmode
9852 || (op_mode == XFmode
9853 && ! (standard_80387_constant_p (op0) == 1
9854 || standard_80387_constant_p (op1) == 1)
9855 && GET_CODE (op1) != FLOAT)
9856 || ix86_use_fcomi_compare (code)))
9858 op0 = force_reg (op_mode, op0);
9859 op1 = force_reg (op_mode, op1);
9863 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9864 things around if they appear profitable, otherwise force op0
9867 if (standard_80387_constant_p (op0) == 0
9868 || (GET_CODE (op0) == MEM
9869 && ! (standard_80387_constant_p (op1) == 0
9870 || GET_CODE (op1) == MEM)))
9873 tmp = op0, op0 = op1, op1 = tmp;
9874 code = swap_condition (code);
9877 if (GET_CODE (op0) != REG)
9878 op0 = force_reg (op_mode, op0);
9880 if (CONSTANT_P (op1))
9882 int tmp = standard_80387_constant_p (op1);
9884 op1 = validize_mem (force_const_mem (op_mode, op1));
9888 op1 = force_reg (op_mode, op1);
9891 op1 = force_reg (op_mode, op1);
9895 /* Try to rearrange the comparison to make it cheaper. */
9896 if (ix86_fp_comparison_cost (code)
9897 > ix86_fp_comparison_cost (swap_condition (code))
9898 && (GET_CODE (op1) == REG || !no_new_pseudos))
9901 tmp = op0, op0 = op1, op1 = tmp;
9902 code = swap_condition (code);
9903 if (GET_CODE (op0) != REG)
9904 op0 = force_reg (op_mode, op0);
9912 /* Convert comparison codes we use to represent FP comparison to integer
9913 code that will result in proper branch. Return UNKNOWN if no such code
9917 ix86_fp_compare_code_to_integer (enum rtx_code code)
9946 /* Split comparison code CODE into comparisons we can do using branch
9947 instructions. BYPASS_CODE is comparison code for branch that will
9948 branch around FIRST_CODE and SECOND_CODE. If some of branches
9949 is not required, set value to UNKNOWN.
9950 We never require more than two branches. */
9953 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9954 enum rtx_code *first_code,
9955 enum rtx_code *second_code)
9958 *bypass_code = UNKNOWN;
9959 *second_code = UNKNOWN;
9961 /* The fcomi comparison sets flags as follows:
9971 case GT: /* GTU - CF=0 & ZF=0 */
9972 case GE: /* GEU - CF=0 */
9973 case ORDERED: /* PF=0 */
9974 case UNORDERED: /* PF=1 */
9975 case UNEQ: /* EQ - ZF=1 */
9976 case UNLT: /* LTU - CF=1 */
9977 case UNLE: /* LEU - CF=1 | ZF=1 */
9978 case LTGT: /* EQ - ZF=0 */
9980 case LT: /* LTU - CF=1 - fails on unordered */
9982 *bypass_code = UNORDERED;
9984 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9986 *bypass_code = UNORDERED;
9988 case EQ: /* EQ - ZF=1 - fails on unordered */
9990 *bypass_code = UNORDERED;
9992 case NE: /* NE - ZF=0 - fails on unordered */
9994 *second_code = UNORDERED;
9996 case UNGE: /* GEU - CF=0 - fails on unordered */
9998 *second_code = UNORDERED;
10000 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10002 *second_code = UNORDERED;
10005 gcc_unreachable ();
10007 if (!TARGET_IEEE_FP)
10009 *second_code = UNKNOWN;
10010 *bypass_code = UNKNOWN;
10014 /* Return cost of comparison done fcom + arithmetics operations on AX.
10015 All following functions do use number of instructions as a cost metrics.
10016 In future this should be tweaked to compute bytes for optimize_size and
10017 take into account performance of various instructions on various CPUs. */
10019 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10021 if (!TARGET_IEEE_FP)
10023 /* The cost of code output by ix86_expand_fp_compare. */
10047 gcc_unreachable ();
10051 /* Return cost of comparison done using fcomi operation.
10052 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10054 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10056 enum rtx_code bypass_code, first_code, second_code;
10057 /* Return arbitrarily high cost when instruction is not supported - this
10058 prevents gcc from using it. */
10061 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10062 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10065 /* Return cost of comparison done using sahf operation.
10066 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10068 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10070 enum rtx_code bypass_code, first_code, second_code;
10071 /* Return arbitrarily high cost when instruction is not preferred - this
10072 avoids gcc from using it. */
10073 if (!TARGET_USE_SAHF && !optimize_size)
10075 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10076 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10079 /* Compute cost of the comparison done using any method.
10080 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10082 ix86_fp_comparison_cost (enum rtx_code code)
10084 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10087 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10088 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10090 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10091 if (min > sahf_cost)
10093 if (min > fcomi_cost)
10098 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10101 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10102 rtx *second_test, rtx *bypass_test)
10104 enum machine_mode fpcmp_mode, intcmp_mode;
10106 int cost = ix86_fp_comparison_cost (code);
10107 enum rtx_code bypass_code, first_code, second_code;
10109 fpcmp_mode = ix86_fp_compare_mode (code);
10110 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10113 *second_test = NULL_RTX;
10115 *bypass_test = NULL_RTX;
10117 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10119 /* Do fcomi/sahf based test when profitable. */
10120 if ((bypass_code == UNKNOWN || bypass_test)
10121 && (second_code == UNKNOWN || second_test)
10122 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10126 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10127 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10133 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10134 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10136 scratch = gen_reg_rtx (HImode);
10137 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10138 emit_insn (gen_x86_sahf_1 (scratch));
10141 /* The FP codes work out to act like unsigned. */
10142 intcmp_mode = fpcmp_mode;
10144 if (bypass_code != UNKNOWN)
10145 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10146 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10148 if (second_code != UNKNOWN)
10149 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10150 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10155 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10156 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10157 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10159 scratch = gen_reg_rtx (HImode);
10160 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10162 /* In the unordered case, we have to check C2 for NaN's, which
10163 doesn't happen to work out to anything nice combination-wise.
10164 So do some bit twiddling on the value we've got in AH to come
10165 up with an appropriate set of condition codes. */
10167 intcmp_mode = CCNOmode;
10172 if (code == GT || !TARGET_IEEE_FP)
10174 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10179 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10180 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10181 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10182 intcmp_mode = CCmode;
10188 if (code == LT && TARGET_IEEE_FP)
10190 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10191 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10192 intcmp_mode = CCmode;
10197 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10203 if (code == GE || !TARGET_IEEE_FP)
10205 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10210 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10211 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10218 if (code == LE && TARGET_IEEE_FP)
10220 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10221 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10222 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10223 intcmp_mode = CCmode;
10228 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10234 if (code == EQ && TARGET_IEEE_FP)
10236 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10237 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10238 intcmp_mode = CCmode;
10243 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10250 if (code == NE && TARGET_IEEE_FP)
10252 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10253 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10259 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10265 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10269 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10274 gcc_unreachable ();
10278 /* Return the test that should be put into the flags user, i.e.
10279 the bcc, scc, or cmov instruction. */
10280 return gen_rtx_fmt_ee (code, VOIDmode,
10281 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10286 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10289 op0 = ix86_compare_op0;
10290 op1 = ix86_compare_op1;
10293 *second_test = NULL_RTX;
10295 *bypass_test = NULL_RTX;
10297 if (ix86_compare_emitted)
10299 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10300 ix86_compare_emitted = NULL_RTX;
10302 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10303 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10304 second_test, bypass_test);
10306 ret = ix86_expand_int_compare (code, op0, op1);
10311 /* Return true if the CODE will result in nontrivial jump sequence. */
10313 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10315 enum rtx_code bypass_code, first_code, second_code;
10318 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10319 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10323 ix86_expand_branch (enum rtx_code code, rtx label)
10327 /* If we have emitted a compare insn, go straight to simple.
10328 ix86_expand_compare won't emit anything if ix86_compare_emitted
10330 if (ix86_compare_emitted)
10333 switch (GET_MODE (ix86_compare_op0))
10339 tmp = ix86_expand_compare (code, NULL, NULL);
10340 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10341 gen_rtx_LABEL_REF (VOIDmode, label),
10343 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10352 enum rtx_code bypass_code, first_code, second_code;
10354 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10355 &ix86_compare_op1);
10357 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10359 /* Check whether we will use the natural sequence with one jump. If
10360 so, we can expand jump early. Otherwise delay expansion by
10361 creating compound insn to not confuse optimizers. */
10362 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10365 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10366 gen_rtx_LABEL_REF (VOIDmode, label),
10367 pc_rtx, NULL_RTX, NULL_RTX);
10371 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10372 ix86_compare_op0, ix86_compare_op1);
10373 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10374 gen_rtx_LABEL_REF (VOIDmode, label),
10376 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10378 use_fcomi = ix86_use_fcomi_compare (code);
10379 vec = rtvec_alloc (3 + !use_fcomi);
10380 RTVEC_ELT (vec, 0) = tmp;
10382 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10384 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10387 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10389 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10398 /* Expand DImode branch into multiple compare+branch. */
10400 rtx lo[2], hi[2], label2;
10401 enum rtx_code code1, code2, code3;
10402 enum machine_mode submode;
10404 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10406 tmp = ix86_compare_op0;
10407 ix86_compare_op0 = ix86_compare_op1;
10408 ix86_compare_op1 = tmp;
10409 code = swap_condition (code);
10411 if (GET_MODE (ix86_compare_op0) == DImode)
10413 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10414 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10419 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10420 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10424 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10425 avoid two branches. This costs one extra insn, so disable when
10426 optimizing for size. */
10428 if ((code == EQ || code == NE)
10430 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10435 if (hi[1] != const0_rtx)
10436 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10437 NULL_RTX, 0, OPTAB_WIDEN);
10440 if (lo[1] != const0_rtx)
10441 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10442 NULL_RTX, 0, OPTAB_WIDEN);
10444 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10445 NULL_RTX, 0, OPTAB_WIDEN);
10447 ix86_compare_op0 = tmp;
10448 ix86_compare_op1 = const0_rtx;
10449 ix86_expand_branch (code, label);
10453 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10454 op1 is a constant and the low word is zero, then we can just
10455 examine the high word. */
10457 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10460 case LT: case LTU: case GE: case GEU:
10461 ix86_compare_op0 = hi[0];
10462 ix86_compare_op1 = hi[1];
10463 ix86_expand_branch (code, label);
10469 /* Otherwise, we need two or three jumps. */
10471 label2 = gen_label_rtx ();
10474 code2 = swap_condition (code);
10475 code3 = unsigned_condition (code);
10479 case LT: case GT: case LTU: case GTU:
10482 case LE: code1 = LT; code2 = GT; break;
10483 case GE: code1 = GT; code2 = LT; break;
10484 case LEU: code1 = LTU; code2 = GTU; break;
10485 case GEU: code1 = GTU; code2 = LTU; break;
10487 case EQ: code1 = UNKNOWN; code2 = NE; break;
10488 case NE: code2 = UNKNOWN; break;
10491 gcc_unreachable ();
10496 * if (hi(a) < hi(b)) goto true;
10497 * if (hi(a) > hi(b)) goto false;
10498 * if (lo(a) < lo(b)) goto true;
10502 ix86_compare_op0 = hi[0];
10503 ix86_compare_op1 = hi[1];
10505 if (code1 != UNKNOWN)
10506 ix86_expand_branch (code1, label);
10507 if (code2 != UNKNOWN)
10508 ix86_expand_branch (code2, label2);
10510 ix86_compare_op0 = lo[0];
10511 ix86_compare_op1 = lo[1];
10512 ix86_expand_branch (code3, label);
10514 if (code2 != UNKNOWN)
10515 emit_label (label2);
10520 gcc_unreachable ();
10524 /* Split branch based on floating point condition. */
10526 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10527 rtx target1, rtx target2, rtx tmp, rtx pushed)
10529 rtx second, bypass;
10530 rtx label = NULL_RTX;
10532 int bypass_probability = -1, second_probability = -1, probability = -1;
10535 if (target2 != pc_rtx)
10538 code = reverse_condition_maybe_unordered (code);
10543 condition = ix86_expand_fp_compare (code, op1, op2,
10544 tmp, &second, &bypass);
10546 /* Remove pushed operand from stack. */
10548 ix86_free_from_memory (GET_MODE (pushed));
10550 if (split_branch_probability >= 0)
10552 /* Distribute the probabilities across the jumps.
10553 Assume the BYPASS and SECOND to be always test
10555 probability = split_branch_probability;
10557 /* Value of 1 is low enough to make no need for probability
10558 to be updated. Later we may run some experiments and see
10559 if unordered values are more frequent in practice. */
10561 bypass_probability = 1;
10563 second_probability = 1;
10565 if (bypass != NULL_RTX)
10567 label = gen_label_rtx ();
10568 i = emit_jump_insn (gen_rtx_SET
10570 gen_rtx_IF_THEN_ELSE (VOIDmode,
10572 gen_rtx_LABEL_REF (VOIDmode,
10575 if (bypass_probability >= 0)
10577 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10578 GEN_INT (bypass_probability),
10581 i = emit_jump_insn (gen_rtx_SET
10583 gen_rtx_IF_THEN_ELSE (VOIDmode,
10584 condition, target1, target2)));
10585 if (probability >= 0)
10587 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10588 GEN_INT (probability),
10590 if (second != NULL_RTX)
10592 i = emit_jump_insn (gen_rtx_SET
10594 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10596 if (second_probability >= 0)
10598 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10599 GEN_INT (second_probability),
10602 if (label != NULL_RTX)
10603 emit_label (label);
10607 ix86_expand_setcc (enum rtx_code code, rtx dest)
10609 rtx ret, tmp, tmpreg, equiv;
10610 rtx second_test, bypass_test;
10612 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10613 return 0; /* FAIL */
10615 gcc_assert (GET_MODE (dest) == QImode);
10617 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10618 PUT_MODE (ret, QImode);
10623 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10624 if (bypass_test || second_test)
10626 rtx test = second_test;
10628 rtx tmp2 = gen_reg_rtx (QImode);
10631 gcc_assert (!second_test);
10632 test = bypass_test;
10634 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10636 PUT_MODE (test, QImode);
10637 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10640 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10642 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10645 /* Attach a REG_EQUAL note describing the comparison result. */
10646 if (ix86_compare_op0 && ix86_compare_op1)
10648 equiv = simplify_gen_relational (code, QImode,
10649 GET_MODE (ix86_compare_op0),
10650 ix86_compare_op0, ix86_compare_op1);
10651 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10654 return 1; /* DONE */
10657 /* Expand comparison setting or clearing carry flag. Return true when
10658 successful and set pop for the operation. */
10660 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10662 enum machine_mode mode =
10663 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10665 /* Do not handle DImode compares that go through special path. Also we can't
10666 deal with FP compares yet. This is possible to add. */
10667 if (mode == (TARGET_64BIT ? TImode : DImode))
10669 if (FLOAT_MODE_P (mode))
10671 rtx second_test = NULL, bypass_test = NULL;
10672 rtx compare_op, compare_seq;
10674 /* Shortcut: following common codes never translate into carry flag compares. */
10675 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10676 || code == ORDERED || code == UNORDERED)
10679 /* These comparisons require zero flag; swap operands so they won't. */
10680 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10681 && !TARGET_IEEE_FP)
10686 code = swap_condition (code);
10689 /* Try to expand the comparison and verify that we end up with carry flag
10690 based comparison. This is fails to be true only when we decide to expand
10691 comparison using arithmetic that is not too common scenario. */
10693 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10694 &second_test, &bypass_test);
10695 compare_seq = get_insns ();
10698 if (second_test || bypass_test)
10700 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10701 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10702 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10704 code = GET_CODE (compare_op);
10705 if (code != LTU && code != GEU)
10707 emit_insn (compare_seq);
10711 if (!INTEGRAL_MODE_P (mode))
10719 /* Convert a==0 into (unsigned)a<1. */
10722 if (op1 != const0_rtx)
10725 code = (code == EQ ? LTU : GEU);
10728 /* Convert a>b into b<a or a>=b-1. */
10731 if (GET_CODE (op1) == CONST_INT)
10733 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10734 /* Bail out on overflow. We still can swap operands but that
10735 would force loading of the constant into register. */
10736 if (op1 == const0_rtx
10737 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10739 code = (code == GTU ? GEU : LTU);
10746 code = (code == GTU ? LTU : GEU);
10750 /* Convert a>=0 into (unsigned)a<0x80000000. */
10753 if (mode == DImode || op1 != const0_rtx)
10755 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10756 code = (code == LT ? GEU : LTU);
10760 if (mode == DImode || op1 != constm1_rtx)
10762 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10763 code = (code == LE ? GEU : LTU);
10769 /* Swapping operands may cause constant to appear as first operand. */
10770 if (!nonimmediate_operand (op0, VOIDmode))
10772 if (no_new_pseudos)
10774 op0 = force_reg (mode, op0);
10776 ix86_compare_op0 = op0;
10777 ix86_compare_op1 = op1;
10778 *pop = ix86_expand_compare (code, NULL, NULL);
10779 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10784 ix86_expand_int_movcc (rtx operands[])
10786 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10787 rtx compare_seq, compare_op;
10788 rtx second_test, bypass_test;
10789 enum machine_mode mode = GET_MODE (operands[0]);
10790 bool sign_bit_compare_p = false;;
10793 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10794 compare_seq = get_insns ();
10797 compare_code = GET_CODE (compare_op);
10799 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10800 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10801 sign_bit_compare_p = true;
10803 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10804 HImode insns, we'd be swallowed in word prefix ops. */
10806 if ((mode != HImode || TARGET_FAST_PREFIX)
10807 && (mode != (TARGET_64BIT ? TImode : DImode))
10808 && GET_CODE (operands[2]) == CONST_INT
10809 && GET_CODE (operands[3]) == CONST_INT)
10811 rtx out = operands[0];
10812 HOST_WIDE_INT ct = INTVAL (operands[2]);
10813 HOST_WIDE_INT cf = INTVAL (operands[3]);
10814 HOST_WIDE_INT diff;
10817 /* Sign bit compares are better done using shifts than we do by using
10819 if (sign_bit_compare_p
10820 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10821 ix86_compare_op1, &compare_op))
10823 /* Detect overlap between destination and compare sources. */
10826 if (!sign_bit_compare_p)
10828 bool fpcmp = false;
10830 compare_code = GET_CODE (compare_op);
10832 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10833 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10836 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10839 /* To simplify rest of code, restrict to the GEU case. */
10840 if (compare_code == LTU)
10842 HOST_WIDE_INT tmp = ct;
10845 compare_code = reverse_condition (compare_code);
10846 code = reverse_condition (code);
10851 PUT_CODE (compare_op,
10852 reverse_condition_maybe_unordered
10853 (GET_CODE (compare_op)));
10855 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10859 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10860 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10861 tmp = gen_reg_rtx (mode);
10863 if (mode == DImode)
10864 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10866 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10870 if (code == GT || code == GE)
10871 code = reverse_condition (code);
10874 HOST_WIDE_INT tmp = ct;
10879 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10880 ix86_compare_op1, VOIDmode, 0, -1);
10893 tmp = expand_simple_binop (mode, PLUS,
10895 copy_rtx (tmp), 1, OPTAB_DIRECT);
10906 tmp = expand_simple_binop (mode, IOR,
10908 copy_rtx (tmp), 1, OPTAB_DIRECT);
10910 else if (diff == -1 && ct)
10920 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10922 tmp = expand_simple_binop (mode, PLUS,
10923 copy_rtx (tmp), GEN_INT (cf),
10924 copy_rtx (tmp), 1, OPTAB_DIRECT);
10932 * andl cf - ct, dest
10942 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10945 tmp = expand_simple_binop (mode, AND,
10947 gen_int_mode (cf - ct, mode),
10948 copy_rtx (tmp), 1, OPTAB_DIRECT);
10950 tmp = expand_simple_binop (mode, PLUS,
10951 copy_rtx (tmp), GEN_INT (ct),
10952 copy_rtx (tmp), 1, OPTAB_DIRECT);
10955 if (!rtx_equal_p (tmp, out))
10956 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10958 return 1; /* DONE */
10964 tmp = ct, ct = cf, cf = tmp;
10966 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10968 /* We may be reversing unordered compare to normal compare, that
10969 is not valid in general (we may convert non-trapping condition
10970 to trapping one), however on i386 we currently emit all
10971 comparisons unordered. */
10972 compare_code = reverse_condition_maybe_unordered (compare_code);
10973 code = reverse_condition_maybe_unordered (code);
10977 compare_code = reverse_condition (compare_code);
10978 code = reverse_condition (code);
10982 compare_code = UNKNOWN;
10983 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10984 && GET_CODE (ix86_compare_op1) == CONST_INT)
10986 if (ix86_compare_op1 == const0_rtx
10987 && (code == LT || code == GE))
10988 compare_code = code;
10989 else if (ix86_compare_op1 == constm1_rtx)
10993 else if (code == GT)
10998 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10999 if (compare_code != UNKNOWN
11000 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11001 && (cf == -1 || ct == -1))
11003 /* If lea code below could be used, only optimize
11004 if it results in a 2 insn sequence. */
11006 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11007 || diff == 3 || diff == 5 || diff == 9)
11008 || (compare_code == LT && ct == -1)
11009 || (compare_code == GE && cf == -1))
11012 * notl op1 (if necessary)
11020 code = reverse_condition (code);
11023 out = emit_store_flag (out, code, ix86_compare_op0,
11024 ix86_compare_op1, VOIDmode, 0, -1);
11026 out = expand_simple_binop (mode, IOR,
11028 out, 1, OPTAB_DIRECT);
11029 if (out != operands[0])
11030 emit_move_insn (operands[0], out);
11032 return 1; /* DONE */
11037 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11038 || diff == 3 || diff == 5 || diff == 9)
11039 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11041 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11047 * lea cf(dest*(ct-cf)),dest
11051 * This also catches the degenerate setcc-only case.
11057 out = emit_store_flag (out, code, ix86_compare_op0,
11058 ix86_compare_op1, VOIDmode, 0, 1);
11061 /* On x86_64 the lea instruction operates on Pmode, so we need
11062 to get arithmetics done in proper mode to match. */
11064 tmp = copy_rtx (out);
11068 out1 = copy_rtx (out);
11069 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11073 tmp = gen_rtx_PLUS (mode, tmp, out1);
11079 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11082 if (!rtx_equal_p (tmp, out))
11085 out = force_operand (tmp, copy_rtx (out));
11087 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11089 if (!rtx_equal_p (out, operands[0]))
11090 emit_move_insn (operands[0], copy_rtx (out));
11092 return 1; /* DONE */
11096 * General case: Jumpful:
11097 * xorl dest,dest cmpl op1, op2
11098 * cmpl op1, op2 movl ct, dest
11099 * setcc dest jcc 1f
11100 * decl dest movl cf, dest
11101 * andl (cf-ct),dest 1:
11104 * Size 20. Size 14.
11106 * This is reasonably steep, but branch mispredict costs are
11107 * high on modern cpus, so consider failing only if optimizing
11111 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11112 && BRANCH_COST >= 2)
11118 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11119 /* We may be reversing unordered compare to normal compare,
11120 that is not valid in general (we may convert non-trapping
11121 condition to trapping one), however on i386 we currently
11122 emit all comparisons unordered. */
11123 code = reverse_condition_maybe_unordered (code);
11126 code = reverse_condition (code);
11127 if (compare_code != UNKNOWN)
11128 compare_code = reverse_condition (compare_code);
11132 if (compare_code != UNKNOWN)
11134 /* notl op1 (if needed)
11139 For x < 0 (resp. x <= -1) there will be no notl,
11140 so if possible swap the constants to get rid of the
11142 True/false will be -1/0 while code below (store flag
11143 followed by decrement) is 0/-1, so the constants need
11144 to be exchanged once more. */
11146 if (compare_code == GE || !cf)
11148 code = reverse_condition (code);
11153 HOST_WIDE_INT tmp = cf;
11158 out = emit_store_flag (out, code, ix86_compare_op0,
11159 ix86_compare_op1, VOIDmode, 0, -1);
11163 out = emit_store_flag (out, code, ix86_compare_op0,
11164 ix86_compare_op1, VOIDmode, 0, 1);
11166 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11167 copy_rtx (out), 1, OPTAB_DIRECT);
11170 out = expand_simple_binop (mode, AND, copy_rtx (out),
11171 gen_int_mode (cf - ct, mode),
11172 copy_rtx (out), 1, OPTAB_DIRECT);
11174 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11175 copy_rtx (out), 1, OPTAB_DIRECT);
11176 if (!rtx_equal_p (out, operands[0]))
11177 emit_move_insn (operands[0], copy_rtx (out));
11179 return 1; /* DONE */
11183 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11185 /* Try a few things more with specific constants and a variable. */
11188 rtx var, orig_out, out, tmp;
11190 if (BRANCH_COST <= 2)
11191 return 0; /* FAIL */
11193 /* If one of the two operands is an interesting constant, load a
11194 constant with the above and mask it in with a logical operation. */
11196 if (GET_CODE (operands[2]) == CONST_INT)
11199 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11200 operands[3] = constm1_rtx, op = and_optab;
11201 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11202 operands[3] = const0_rtx, op = ior_optab;
11204 return 0; /* FAIL */
11206 else if (GET_CODE (operands[3]) == CONST_INT)
11209 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11210 operands[2] = constm1_rtx, op = and_optab;
11211 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11212 operands[2] = const0_rtx, op = ior_optab;
11214 return 0; /* FAIL */
11217 return 0; /* FAIL */
11219 orig_out = operands[0];
11220 tmp = gen_reg_rtx (mode);
11223 /* Recurse to get the constant loaded. */
11224 if (ix86_expand_int_movcc (operands) == 0)
11225 return 0; /* FAIL */
11227 /* Mask in the interesting variable. */
11228 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11230 if (!rtx_equal_p (out, orig_out))
11231 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11233 return 1; /* DONE */
11237 * For comparison with above,
11247 if (! nonimmediate_operand (operands[2], mode))
11248 operands[2] = force_reg (mode, operands[2]);
11249 if (! nonimmediate_operand (operands[3], mode))
11250 operands[3] = force_reg (mode, operands[3]);
11252 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11254 rtx tmp = gen_reg_rtx (mode);
11255 emit_move_insn (tmp, operands[3]);
11258 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11260 rtx tmp = gen_reg_rtx (mode);
11261 emit_move_insn (tmp, operands[2]);
11265 if (! register_operand (operands[2], VOIDmode)
11267 || ! register_operand (operands[3], VOIDmode)))
11268 operands[2] = force_reg (mode, operands[2]);
11271 && ! register_operand (operands[3], VOIDmode))
11272 operands[3] = force_reg (mode, operands[3]);
11274 emit_insn (compare_seq);
11275 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11276 gen_rtx_IF_THEN_ELSE (mode,
11277 compare_op, operands[2],
11280 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11281 gen_rtx_IF_THEN_ELSE (mode,
11283 copy_rtx (operands[3]),
11284 copy_rtx (operands[0]))));
11286 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11287 gen_rtx_IF_THEN_ELSE (mode,
11289 copy_rtx (operands[2]),
11290 copy_rtx (operands[0]))));
11292 return 1; /* DONE */
11295 /* Swap, force into registers, or otherwise massage the two operands
11296 to an sse comparison with a mask result. Thus we differ a bit from
11297 ix86_prepare_fp_compare_args which expects to produce a flags result.
11299 The DEST operand exists to help determine whether to commute commutative
11300 operators. The POP0/POP1 operands are updated in place. The new
11301 comparison code is returned, or UNKNOWN if not implementable. */
11303 static enum rtx_code
11304 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11305 rtx *pop0, rtx *pop1)
11313 /* We have no LTGT as an operator. We could implement it with
11314 NE & ORDERED, but this requires an extra temporary. It's
11315 not clear that it's worth it. */
11322 /* These are supported directly. */
11329 /* For commutative operators, try to canonicalize the destination
11330 operand to be first in the comparison - this helps reload to
11331 avoid extra moves. */
11332 if (!dest || !rtx_equal_p (dest, *pop1))
11340 /* These are not supported directly. Swap the comparison operands
11341 to transform into something that is supported. */
11345 code = swap_condition (code);
11349 gcc_unreachable ();
11355 /* Detect conditional moves that exactly match min/max operational
11356 semantics. Note that this is IEEE safe, as long as we don't
11357 interchange the operands.
11359 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11360 and TRUE if the operation is successful and instructions are emitted. */
11363 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11364 rtx cmp_op1, rtx if_true, rtx if_false)
11366 enum machine_mode mode;
11372 else if (code == UNGE)
11375 if_true = if_false;
11381 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11383 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11388 mode = GET_MODE (dest);
11390 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11391 but MODE may be a vector mode and thus not appropriate. */
11392 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11394 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11397 if_true = force_reg (mode, if_true);
11398 v = gen_rtvec (2, if_true, if_false);
11399 tmp = gen_rtx_UNSPEC (mode, v, u);
11403 code = is_min ? SMIN : SMAX;
11404 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11407 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11411 /* Expand an sse vector comparison. Return the register with the result. */
11414 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11415 rtx op_true, rtx op_false)
11417 enum machine_mode mode = GET_MODE (dest);
11420 cmp_op0 = force_reg (mode, cmp_op0);
11421 if (!nonimmediate_operand (cmp_op1, mode))
11422 cmp_op1 = force_reg (mode, cmp_op1);
11425 || reg_overlap_mentioned_p (dest, op_true)
11426 || reg_overlap_mentioned_p (dest, op_false))
11427 dest = gen_reg_rtx (mode);
11429 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11430 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11435 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11436 operations. This is used for both scalar and vector conditional moves. */
11439 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11441 enum machine_mode mode = GET_MODE (dest);
11444 if (op_false == CONST0_RTX (mode))
11446 op_true = force_reg (mode, op_true);
11447 x = gen_rtx_AND (mode, cmp, op_true);
11448 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11450 else if (op_true == CONST0_RTX (mode))
11452 op_false = force_reg (mode, op_false);
11453 x = gen_rtx_NOT (mode, cmp);
11454 x = gen_rtx_AND (mode, x, op_false);
11455 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11459 op_true = force_reg (mode, op_true);
11460 op_false = force_reg (mode, op_false);
11462 t2 = gen_reg_rtx (mode);
11464 t3 = gen_reg_rtx (mode);
11468 x = gen_rtx_AND (mode, op_true, cmp);
11469 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11471 x = gen_rtx_NOT (mode, cmp);
11472 x = gen_rtx_AND (mode, x, op_false);
11473 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11475 x = gen_rtx_IOR (mode, t3, t2);
11476 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11480 /* Expand a floating-point conditional move. Return true if successful. */
11483 ix86_expand_fp_movcc (rtx operands[])
11485 enum machine_mode mode = GET_MODE (operands[0]);
11486 enum rtx_code code = GET_CODE (operands[1]);
11487 rtx tmp, compare_op, second_test, bypass_test;
11489 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11491 enum machine_mode cmode;
11493 /* Since we've no cmove for sse registers, don't force bad register
11494 allocation just to gain access to it. Deny movcc when the
11495 comparison mode doesn't match the move mode. */
11496 cmode = GET_MODE (ix86_compare_op0);
11497 if (cmode == VOIDmode)
11498 cmode = GET_MODE (ix86_compare_op1);
11502 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11504 &ix86_compare_op1);
11505 if (code == UNKNOWN)
11508 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11509 ix86_compare_op1, operands[2],
11513 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11514 ix86_compare_op1, operands[2], operands[3]);
11515 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11519 /* The floating point conditional move instructions don't directly
11520 support conditions resulting from a signed integer comparison. */
11522 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11524 /* The floating point conditional move instructions don't directly
11525 support signed integer comparisons. */
11527 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11529 gcc_assert (!second_test && !bypass_test);
11530 tmp = gen_reg_rtx (QImode);
11531 ix86_expand_setcc (code, tmp);
11533 ix86_compare_op0 = tmp;
11534 ix86_compare_op1 = const0_rtx;
11535 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11537 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11539 tmp = gen_reg_rtx (mode);
11540 emit_move_insn (tmp, operands[3]);
11543 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11545 tmp = gen_reg_rtx (mode);
11546 emit_move_insn (tmp, operands[2]);
11550 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11551 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11552 operands[2], operands[3])));
11554 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11555 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11556 operands[3], operands[0])));
11558 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11559 gen_rtx_IF_THEN_ELSE (mode, second_test,
11560 operands[2], operands[0])));
11565 /* Expand a floating-point vector conditional move; a vcond operation
11566 rather than a movcc operation. */
11569 ix86_expand_fp_vcond (rtx operands[])
11571 enum rtx_code code = GET_CODE (operands[3]);
11574 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11575 &operands[4], &operands[5]);
11576 if (code == UNKNOWN)
11579 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11580 operands[5], operands[1], operands[2]))
11583 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11584 operands[1], operands[2]);
11585 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11589 /* Expand a signed integral vector conditional move. */
11592 ix86_expand_int_vcond (rtx operands[])
11594 enum machine_mode mode = GET_MODE (operands[0]);
11595 enum rtx_code code = GET_CODE (operands[3]);
11596 bool negate = false;
11599 cop0 = operands[4];
11600 cop1 = operands[5];
11602 /* Canonicalize the comparison to EQ, GT, GTU. */
11613 code = reverse_condition (code);
11619 code = reverse_condition (code);
11625 code = swap_condition (code);
11626 x = cop0, cop0 = cop1, cop1 = x;
11630 gcc_unreachable ();
11633 /* Unsigned parallel compare is not supported by the hardware. Play some
11634 tricks to turn this into a signed comparison against 0. */
11637 cop0 = force_reg (mode, cop0);
11645 /* Perform a parallel modulo subtraction. */
11646 t1 = gen_reg_rtx (mode);
11647 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11649 /* Extract the original sign bit of op0. */
11650 mask = GEN_INT (-0x80000000);
11651 mask = gen_rtx_CONST_VECTOR (mode,
11652 gen_rtvec (4, mask, mask, mask, mask));
11653 mask = force_reg (mode, mask);
11654 t2 = gen_reg_rtx (mode);
11655 emit_insn (gen_andv4si3 (t2, cop0, mask));
11657 /* XOR it back into the result of the subtraction. This results
11658 in the sign bit set iff we saw unsigned underflow. */
11659 x = gen_reg_rtx (mode);
11660 emit_insn (gen_xorv4si3 (x, t1, t2));
11668 /* Perform a parallel unsigned saturating subtraction. */
11669 x = gen_reg_rtx (mode);
11670 emit_insn (gen_rtx_SET (VOIDmode, x,
11671 gen_rtx_US_MINUS (mode, cop0, cop1)));
11678 gcc_unreachable ();
11682 cop1 = CONST0_RTX (mode);
11685 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11686 operands[1+negate], operands[2-negate]);
11688 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11689 operands[2-negate]);
11693 /* Expand conditional increment or decrement using adb/sbb instructions.
11694 The default case using setcc followed by the conditional move can be
11695 done by generic code. */
11697 ix86_expand_int_addcc (rtx operands[])
11699 enum rtx_code code = GET_CODE (operands[1]);
11701 rtx val = const0_rtx;
11702 bool fpcmp = false;
11703 enum machine_mode mode = GET_MODE (operands[0]);
11705 if (operands[3] != const1_rtx
11706 && operands[3] != constm1_rtx)
11708 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11709 ix86_compare_op1, &compare_op))
11711 code = GET_CODE (compare_op);
11713 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11714 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11717 code = ix86_fp_compare_code_to_integer (code);
11724 PUT_CODE (compare_op,
11725 reverse_condition_maybe_unordered
11726 (GET_CODE (compare_op)));
11728 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11730 PUT_MODE (compare_op, mode);
11732 /* Construct either adc or sbb insn. */
11733 if ((code == LTU) == (operands[3] == constm1_rtx))
11735 switch (GET_MODE (operands[0]))
11738 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11741 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11744 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11747 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11750 gcc_unreachable ();
11755 switch (GET_MODE (operands[0]))
11758 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11761 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11764 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11767 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11770 gcc_unreachable ();
11773 return 1; /* DONE */
11777 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11778 works for floating pointer parameters and nonoffsetable memories.
11779 For pushes, it returns just stack offsets; the values will be saved
11780 in the right order. Maximally three parts are generated. */
11783 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11788 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11790 size = (GET_MODE_SIZE (mode) + 4) / 8;
11792 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11793 gcc_assert (size >= 2 && size <= 3);
11795 /* Optimize constant pool reference to immediates. This is used by fp
11796 moves, that force all constants to memory to allow combining. */
11797 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11799 rtx tmp = maybe_get_pool_constant (operand);
11804 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11806 /* The only non-offsetable memories we handle are pushes. */
11807 int ok = push_operand (operand, VOIDmode);
11811 operand = copy_rtx (operand);
11812 PUT_MODE (operand, Pmode);
11813 parts[0] = parts[1] = parts[2] = operand;
11817 if (GET_CODE (operand) == CONST_VECTOR)
11819 enum machine_mode imode = int_mode_for_mode (mode);
11820 /* Caution: if we looked through a constant pool memory above,
11821 the operand may actually have a different mode now. That's
11822 ok, since we want to pun this all the way back to an integer. */
11823 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11824 gcc_assert (operand != NULL);
11830 if (mode == DImode)
11831 split_di (&operand, 1, &parts[0], &parts[1]);
11834 if (REG_P (operand))
11836 gcc_assert (reload_completed);
11837 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11838 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11840 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11842 else if (offsettable_memref_p (operand))
11844 operand = adjust_address (operand, SImode, 0);
11845 parts[0] = operand;
11846 parts[1] = adjust_address (operand, SImode, 4);
11848 parts[2] = adjust_address (operand, SImode, 8);
11850 else if (GET_CODE (operand) == CONST_DOUBLE)
11855 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11859 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11860 parts[2] = gen_int_mode (l[2], SImode);
11863 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11866 gcc_unreachable ();
11868 parts[1] = gen_int_mode (l[1], SImode);
11869 parts[0] = gen_int_mode (l[0], SImode);
11872 gcc_unreachable ();
11877 if (mode == TImode)
11878 split_ti (&operand, 1, &parts[0], &parts[1]);
11879 if (mode == XFmode || mode == TFmode)
11881 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11882 if (REG_P (operand))
11884 gcc_assert (reload_completed);
11885 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11886 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11888 else if (offsettable_memref_p (operand))
11890 operand = adjust_address (operand, DImode, 0);
11891 parts[0] = operand;
11892 parts[1] = adjust_address (operand, upper_mode, 8);
11894 else if (GET_CODE (operand) == CONST_DOUBLE)
11899 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11900 real_to_target (l, &r, mode);
11902 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11903 if (HOST_BITS_PER_WIDE_INT >= 64)
11906 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11907 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11910 parts[0] = immed_double_const (l[0], l[1], DImode);
11912 if (upper_mode == SImode)
11913 parts[1] = gen_int_mode (l[2], SImode);
11914 else if (HOST_BITS_PER_WIDE_INT >= 64)
11917 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11918 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11921 parts[1] = immed_double_const (l[2], l[3], DImode);
11924 gcc_unreachable ();
11931 /* Emit insns to perform a move or push of DI, DF, and XF values.
11932 Return false when normal moves are needed; true when all required
11933 insns have been emitted. Operands 2-4 contain the input values
11934 int the correct order; operands 5-7 contain the output values. */
11937 ix86_split_long_move (rtx operands[])
11942 int collisions = 0;
11943 enum machine_mode mode = GET_MODE (operands[0]);
11945 /* The DFmode expanders may ask us to move double.
11946 For 64bit target this is single move. By hiding the fact
11947 here we simplify i386.md splitters. */
11948 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11950 /* Optimize constant pool reference to immediates. This is used by
11951 fp moves, that force all constants to memory to allow combining. */
11953 if (GET_CODE (operands[1]) == MEM
11954 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11955 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11956 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11957 if (push_operand (operands[0], VOIDmode))
11959 operands[0] = copy_rtx (operands[0]);
11960 PUT_MODE (operands[0], Pmode);
11963 operands[0] = gen_lowpart (DImode, operands[0]);
11964 operands[1] = gen_lowpart (DImode, operands[1]);
11965 emit_move_insn (operands[0], operands[1]);
11969 /* The only non-offsettable memory we handle is push. */
11970 if (push_operand (operands[0], VOIDmode))
11973 gcc_assert (GET_CODE (operands[0]) != MEM
11974 || offsettable_memref_p (operands[0]));
11976 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11977 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
11979 /* When emitting push, take care for source operands on the stack. */
11980 if (push && GET_CODE (operands[1]) == MEM
11981 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11984 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11985 XEXP (part[1][2], 0));
11986 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11987 XEXP (part[1][1], 0));
11990 /* We need to do copy in the right order in case an address register
11991 of the source overlaps the destination. */
11992 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11994 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
11996 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11999 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12002 /* Collision in the middle part can be handled by reordering. */
12003 if (collisions == 1 && nparts == 3
12004 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12007 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12008 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12011 /* If there are more collisions, we can't handle it by reordering.
12012 Do an lea to the last part and use only one colliding move. */
12013 else if (collisions > 1)
12019 base = part[0][nparts - 1];
12021 /* Handle the case when the last part isn't valid for lea.
12022 Happens in 64-bit mode storing the 12-byte XFmode. */
12023 if (GET_MODE (base) != Pmode)
12024 base = gen_rtx_REG (Pmode, REGNO (base));
12026 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12027 part[1][0] = replace_equiv_address (part[1][0], base);
12028 part[1][1] = replace_equiv_address (part[1][1],
12029 plus_constant (base, UNITS_PER_WORD));
12031 part[1][2] = replace_equiv_address (part[1][2],
12032 plus_constant (base, 8));
12042 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12043 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12044 emit_move_insn (part[0][2], part[1][2]);
12049 /* In 64bit mode we don't have 32bit push available. In case this is
12050 register, it is OK - we will just use larger counterpart. We also
12051 retype memory - these comes from attempt to avoid REX prefix on
12052 moving of second half of TFmode value. */
12053 if (GET_MODE (part[1][1]) == SImode)
12055 switch (GET_CODE (part[1][1]))
12058 part[1][1] = adjust_address (part[1][1], DImode, 0);
12062 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12066 gcc_unreachable ();
12069 if (GET_MODE (part[1][0]) == SImode)
12070 part[1][0] = part[1][1];
12073 emit_move_insn (part[0][1], part[1][1]);
12074 emit_move_insn (part[0][0], part[1][0]);
12078 /* Choose correct order to not overwrite the source before it is copied. */
12079 if ((REG_P (part[0][0])
12080 && REG_P (part[1][1])
12081 && (REGNO (part[0][0]) == REGNO (part[1][1])
12083 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12085 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12089 operands[2] = part[0][2];
12090 operands[3] = part[0][1];
12091 operands[4] = part[0][0];
12092 operands[5] = part[1][2];
12093 operands[6] = part[1][1];
12094 operands[7] = part[1][0];
12098 operands[2] = part[0][1];
12099 operands[3] = part[0][0];
12100 operands[5] = part[1][1];
12101 operands[6] = part[1][0];
12108 operands[2] = part[0][0];
12109 operands[3] = part[0][1];
12110 operands[4] = part[0][2];
12111 operands[5] = part[1][0];
12112 operands[6] = part[1][1];
12113 operands[7] = part[1][2];
12117 operands[2] = part[0][0];
12118 operands[3] = part[0][1];
12119 operands[5] = part[1][0];
12120 operands[6] = part[1][1];
12124 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12127 if (GET_CODE (operands[5]) == CONST_INT
12128 && operands[5] != const0_rtx
12129 && REG_P (operands[2]))
12131 if (GET_CODE (operands[6]) == CONST_INT
12132 && INTVAL (operands[6]) == INTVAL (operands[5]))
12133 operands[6] = operands[2];
12136 && GET_CODE (operands[7]) == CONST_INT
12137 && INTVAL (operands[7]) == INTVAL (operands[5]))
12138 operands[7] = operands[2];
12142 && GET_CODE (operands[6]) == CONST_INT
12143 && operands[6] != const0_rtx
12144 && REG_P (operands[3])
12145 && GET_CODE (operands[7]) == CONST_INT
12146 && INTVAL (operands[7]) == INTVAL (operands[6]))
12147 operands[7] = operands[3];
12150 emit_move_insn (operands[2], operands[5]);
12151 emit_move_insn (operands[3], operands[6]);
12153 emit_move_insn (operands[4], operands[7]);
12158 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12159 left shift by a constant, either using a single shift or
12160 a sequence of add instructions. */
12163 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12167 emit_insn ((mode == DImode
12169 : gen_adddi3) (operand, operand, operand));
12171 else if (!optimize_size
12172 && count * ix86_cost->add <= ix86_cost->shift_const)
12175 for (i=0; i<count; i++)
12177 emit_insn ((mode == DImode
12179 : gen_adddi3) (operand, operand, operand));
12183 emit_insn ((mode == DImode
12185 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12189 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12191 rtx low[2], high[2];
12193 const int single_width = mode == DImode ? 32 : 64;
12195 if (GET_CODE (operands[2]) == CONST_INT)
12197 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12198 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12200 if (count >= single_width)
12202 emit_move_insn (high[0], low[1]);
12203 emit_move_insn (low[0], const0_rtx);
12205 if (count > single_width)
12206 ix86_expand_ashl_const (high[0], count - single_width, mode);
12210 if (!rtx_equal_p (operands[0], operands[1]))
12211 emit_move_insn (operands[0], operands[1]);
12212 emit_insn ((mode == DImode
12214 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12215 ix86_expand_ashl_const (low[0], count, mode);
12220 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12222 if (operands[1] == const1_rtx)
12224 /* Assuming we've chosen a QImode capable registers, then 1 << N
12225 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12226 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12228 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12230 ix86_expand_clear (low[0]);
12231 ix86_expand_clear (high[0]);
12232 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12234 d = gen_lowpart (QImode, low[0]);
12235 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12236 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12237 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12239 d = gen_lowpart (QImode, high[0]);
12240 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12241 s = gen_rtx_NE (QImode, flags, const0_rtx);
12242 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12245 /* Otherwise, we can get the same results by manually performing
12246 a bit extract operation on bit 5/6, and then performing the two
12247 shifts. The two methods of getting 0/1 into low/high are exactly
12248 the same size. Avoiding the shift in the bit extract case helps
12249 pentium4 a bit; no one else seems to care much either way. */
12254 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12255 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12257 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12258 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12260 emit_insn ((mode == DImode
12262 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12263 emit_insn ((mode == DImode
12265 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12266 emit_move_insn (low[0], high[0]);
12267 emit_insn ((mode == DImode
12269 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12272 emit_insn ((mode == DImode
12274 : gen_ashldi3) (low[0], low[0], operands[2]));
12275 emit_insn ((mode == DImode
12277 : gen_ashldi3) (high[0], high[0], operands[2]));
12281 if (operands[1] == constm1_rtx)
12283 /* For -1 << N, we can avoid the shld instruction, because we
12284 know that we're shifting 0...31/63 ones into a -1. */
12285 emit_move_insn (low[0], constm1_rtx);
12287 emit_move_insn (high[0], low[0]);
12289 emit_move_insn (high[0], constm1_rtx);
12293 if (!rtx_equal_p (operands[0], operands[1]))
12294 emit_move_insn (operands[0], operands[1]);
12296 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12297 emit_insn ((mode == DImode
12299 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12302 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12304 if (TARGET_CMOVE && scratch)
12306 ix86_expand_clear (scratch);
12307 emit_insn ((mode == DImode
12308 ? gen_x86_shift_adj_1
12309 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12312 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12316 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12318 rtx low[2], high[2];
12320 const int single_width = mode == DImode ? 32 : 64;
12322 if (GET_CODE (operands[2]) == CONST_INT)
12324 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12325 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12327 if (count == single_width * 2 - 1)
12329 emit_move_insn (high[0], high[1]);
12330 emit_insn ((mode == DImode
12332 : gen_ashrdi3) (high[0], high[0],
12333 GEN_INT (single_width - 1)));
12334 emit_move_insn (low[0], high[0]);
12337 else if (count >= single_width)
12339 emit_move_insn (low[0], high[1]);
12340 emit_move_insn (high[0], low[0]);
12341 emit_insn ((mode == DImode
12343 : gen_ashrdi3) (high[0], high[0],
12344 GEN_INT (single_width - 1)));
12345 if (count > single_width)
12346 emit_insn ((mode == DImode
12348 : gen_ashrdi3) (low[0], low[0],
12349 GEN_INT (count - single_width)));
12353 if (!rtx_equal_p (operands[0], operands[1]))
12354 emit_move_insn (operands[0], operands[1]);
12355 emit_insn ((mode == DImode
12357 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12358 emit_insn ((mode == DImode
12360 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12365 if (!rtx_equal_p (operands[0], operands[1]))
12366 emit_move_insn (operands[0], operands[1]);
12368 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12370 emit_insn ((mode == DImode
12372 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12373 emit_insn ((mode == DImode
12375 : gen_ashrdi3) (high[0], high[0], operands[2]));
12377 if (TARGET_CMOVE && scratch)
12379 emit_move_insn (scratch, high[0]);
12380 emit_insn ((mode == DImode
12382 : gen_ashrdi3) (scratch, scratch,
12383 GEN_INT (single_width - 1)));
12384 emit_insn ((mode == DImode
12385 ? gen_x86_shift_adj_1
12386 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12390 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12395 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12397 rtx low[2], high[2];
12399 const int single_width = mode == DImode ? 32 : 64;
12401 if (GET_CODE (operands[2]) == CONST_INT)
12403 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12404 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12406 if (count >= single_width)
12408 emit_move_insn (low[0], high[1]);
12409 ix86_expand_clear (high[0]);
12411 if (count > single_width)
12412 emit_insn ((mode == DImode
12414 : gen_lshrdi3) (low[0], low[0],
12415 GEN_INT (count - single_width)));
12419 if (!rtx_equal_p (operands[0], operands[1]))
12420 emit_move_insn (operands[0], operands[1]);
12421 emit_insn ((mode == DImode
12423 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12424 emit_insn ((mode == DImode
12426 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12431 if (!rtx_equal_p (operands[0], operands[1]))
12432 emit_move_insn (operands[0], operands[1]);
12434 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12436 emit_insn ((mode == DImode
12438 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12439 emit_insn ((mode == DImode
12441 : gen_lshrdi3) (high[0], high[0], operands[2]));
12443 /* Heh. By reversing the arguments, we can reuse this pattern. */
12444 if (TARGET_CMOVE && scratch)
12446 ix86_expand_clear (scratch);
12447 emit_insn ((mode == DImode
12448 ? gen_x86_shift_adj_1
12449 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12453 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12457 /* Helper function for the string operations below. Dest VARIABLE whether
12458 it is aligned to VALUE bytes. If true, jump to the label. */
12460 ix86_expand_aligntest (rtx variable, int value)
12462 rtx label = gen_label_rtx ();
12463 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12464 if (GET_MODE (variable) == DImode)
12465 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12467 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12468 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12473 /* Adjust COUNTER by the VALUE. */
12475 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12477 if (GET_MODE (countreg) == DImode)
12478 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12480 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12483 /* Zero extend possibly SImode EXP to Pmode register. */
12485 ix86_zero_extend_to_Pmode (rtx exp)
12488 if (GET_MODE (exp) == VOIDmode)
12489 return force_reg (Pmode, exp);
12490 if (GET_MODE (exp) == Pmode)
12491 return copy_to_mode_reg (Pmode, exp);
12492 r = gen_reg_rtx (Pmode);
12493 emit_insn (gen_zero_extendsidi2 (r, exp));
12497 /* Expand string move (memcpy) operation. Use i386 string operations when
12498 profitable. expand_clrmem contains similar code. */
12500 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12502 rtx srcreg, destreg, countreg, srcexp, destexp;
12503 enum machine_mode counter_mode;
12504 HOST_WIDE_INT align = 0;
12505 unsigned HOST_WIDE_INT count = 0;
12507 if (GET_CODE (align_exp) == CONST_INT)
12508 align = INTVAL (align_exp);
12510 /* Can't use any of this if the user has appropriated esi or edi. */
12511 if (global_regs[4] || global_regs[5])
12514 /* This simple hack avoids all inlining code and simplifies code below. */
12515 if (!TARGET_ALIGN_STRINGOPS)
12518 if (GET_CODE (count_exp) == CONST_INT)
12520 count = INTVAL (count_exp);
12521 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12525 /* Figure out proper mode for counter. For 32bits it is always SImode,
12526 for 64bits use SImode when possible, otherwise DImode.
12527 Set count to number of bytes copied when known at compile time. */
12529 || GET_MODE (count_exp) == SImode
12530 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12531 counter_mode = SImode;
12533 counter_mode = DImode;
12535 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12537 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12538 if (destreg != XEXP (dst, 0))
12539 dst = replace_equiv_address_nv (dst, destreg);
12540 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12541 if (srcreg != XEXP (src, 0))
12542 src = replace_equiv_address_nv (src, srcreg);
12544 /* When optimizing for size emit simple rep ; movsb instruction for
12545 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12546 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12547 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12548 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12549 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12550 known to be zero or not. The rep; movsb sequence causes higher
12551 register pressure though, so take that into account. */
12553 if ((!optimize || optimize_size)
12558 || (count & 3) + count / 4 > 6))))
12560 emit_insn (gen_cld ());
12561 countreg = ix86_zero_extend_to_Pmode (count_exp);
12562 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12563 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12564 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12568 /* For constant aligned (or small unaligned) copies use rep movsl
12569 followed by code copying the rest. For PentiumPro ensure 8 byte
12570 alignment to allow rep movsl acceleration. */
12572 else if (count != 0
12574 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12575 || optimize_size || count < (unsigned int) 64))
12577 unsigned HOST_WIDE_INT offset = 0;
12578 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12579 rtx srcmem, dstmem;
12581 emit_insn (gen_cld ());
12582 if (count & ~(size - 1))
12584 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12586 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12588 while (offset < (count & ~(size - 1)))
12590 srcmem = adjust_automodify_address_nv (src, movs_mode,
12592 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12594 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12600 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12601 & (TARGET_64BIT ? -1 : 0x3fffffff));
12602 countreg = copy_to_mode_reg (counter_mode, countreg);
12603 countreg = ix86_zero_extend_to_Pmode (countreg);
12605 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12606 GEN_INT (size == 4 ? 2 : 3));
12607 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12608 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12610 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12611 countreg, destexp, srcexp));
12612 offset = count & ~(size - 1);
12615 if (size == 8 && (count & 0x04))
12617 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12619 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12621 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12626 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12628 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12630 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12635 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12637 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12639 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12642 /* The generic code based on the glibc implementation:
12643 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12644 allowing accelerated copying there)
12645 - copy the data using rep movsl
12646 - copy the rest. */
12651 rtx srcmem, dstmem;
12652 int desired_alignment = (TARGET_PENTIUMPRO
12653 && (count == 0 || count >= (unsigned int) 260)
12654 ? 8 : UNITS_PER_WORD);
12655 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12656 dst = change_address (dst, BLKmode, destreg);
12657 src = change_address (src, BLKmode, srcreg);
12659 /* In case we don't know anything about the alignment, default to
12660 library version, since it is usually equally fast and result in
12663 Also emit call when we know that the count is large and call overhead
12664 will not be important. */
12665 if (!TARGET_INLINE_ALL_STRINGOPS
12666 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12669 if (TARGET_SINGLE_STRINGOP)
12670 emit_insn (gen_cld ());
12672 countreg2 = gen_reg_rtx (Pmode);
12673 countreg = copy_to_mode_reg (counter_mode, count_exp);
12675 /* We don't use loops to align destination and to copy parts smaller
12676 than 4 bytes, because gcc is able to optimize such code better (in
12677 the case the destination or the count really is aligned, gcc is often
12678 able to predict the branches) and also it is friendlier to the
12679 hardware branch prediction.
12681 Using loops is beneficial for generic case, because we can
12682 handle small counts using the loops. Many CPUs (such as Athlon)
12683 have large REP prefix setup costs.
12685 This is quite costly. Maybe we can revisit this decision later or
12686 add some customizability to this code. */
12688 if (count == 0 && align < desired_alignment)
12690 label = gen_label_rtx ();
12691 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12692 LEU, 0, counter_mode, 1, label);
12696 rtx label = ix86_expand_aligntest (destreg, 1);
12697 srcmem = change_address (src, QImode, srcreg);
12698 dstmem = change_address (dst, QImode, destreg);
12699 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12700 ix86_adjust_counter (countreg, 1);
12701 emit_label (label);
12702 LABEL_NUSES (label) = 1;
12706 rtx label = ix86_expand_aligntest (destreg, 2);
12707 srcmem = change_address (src, HImode, srcreg);
12708 dstmem = change_address (dst, HImode, destreg);
12709 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12710 ix86_adjust_counter (countreg, 2);
12711 emit_label (label);
12712 LABEL_NUSES (label) = 1;
12714 if (align <= 4 && desired_alignment > 4)
12716 rtx label = ix86_expand_aligntest (destreg, 4);
12717 srcmem = change_address (src, SImode, srcreg);
12718 dstmem = change_address (dst, SImode, destreg);
12719 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12720 ix86_adjust_counter (countreg, 4);
12721 emit_label (label);
12722 LABEL_NUSES (label) = 1;
12725 if (label && desired_alignment > 4 && !TARGET_64BIT)
12727 emit_label (label);
12728 LABEL_NUSES (label) = 1;
12731 if (!TARGET_SINGLE_STRINGOP)
12732 emit_insn (gen_cld ());
12735 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12737 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12741 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12742 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12744 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12745 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12746 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12747 countreg2, destexp, srcexp));
12751 emit_label (label);
12752 LABEL_NUSES (label) = 1;
12754 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12756 srcmem = change_address (src, SImode, srcreg);
12757 dstmem = change_address (dst, SImode, destreg);
12758 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12760 if ((align <= 4 || count == 0) && TARGET_64BIT)
12762 rtx label = ix86_expand_aligntest (countreg, 4);
12763 srcmem = change_address (src, SImode, srcreg);
12764 dstmem = change_address (dst, SImode, destreg);
12765 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12766 emit_label (label);
12767 LABEL_NUSES (label) = 1;
12769 if (align > 2 && count != 0 && (count & 2))
12771 srcmem = change_address (src, HImode, srcreg);
12772 dstmem = change_address (dst, HImode, destreg);
12773 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12775 if (align <= 2 || count == 0)
12777 rtx label = ix86_expand_aligntest (countreg, 2);
12778 srcmem = change_address (src, HImode, srcreg);
12779 dstmem = change_address (dst, HImode, destreg);
12780 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12781 emit_label (label);
12782 LABEL_NUSES (label) = 1;
12784 if (align > 1 && count != 0 && (count & 1))
12786 srcmem = change_address (src, QImode, srcreg);
12787 dstmem = change_address (dst, QImode, destreg);
12788 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12790 if (align <= 1 || count == 0)
12792 rtx label = ix86_expand_aligntest (countreg, 1);
12793 srcmem = change_address (src, QImode, srcreg);
12794 dstmem = change_address (dst, QImode, destreg);
12795 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12796 emit_label (label);
12797 LABEL_NUSES (label) = 1;
12804 /* Expand string clear operation (bzero). Use i386 string operations when
12805 profitable. expand_movmem contains similar code. */
12807 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12809 rtx destreg, zeroreg, countreg, destexp;
12810 enum machine_mode counter_mode;
12811 HOST_WIDE_INT align = 0;
12812 unsigned HOST_WIDE_INT count = 0;
12814 if (GET_CODE (align_exp) == CONST_INT)
12815 align = INTVAL (align_exp);
12817 /* Can't use any of this if the user has appropriated esi. */
12818 if (global_regs[4])
12821 /* This simple hack avoids all inlining code and simplifies code below. */
12822 if (!TARGET_ALIGN_STRINGOPS)
12825 if (GET_CODE (count_exp) == CONST_INT)
12827 count = INTVAL (count_exp);
12828 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12831 /* Figure out proper mode for counter. For 32bits it is always SImode,
12832 for 64bits use SImode when possible, otherwise DImode.
12833 Set count to number of bytes copied when known at compile time. */
12835 || GET_MODE (count_exp) == SImode
12836 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12837 counter_mode = SImode;
12839 counter_mode = DImode;
12841 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12842 if (destreg != XEXP (dst, 0))
12843 dst = replace_equiv_address_nv (dst, destreg);
12846 /* When optimizing for size emit simple rep ; movsb instruction for
12847 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12848 sequence is 7 bytes long, so if optimizing for size and count is
12849 small enough that some stosl, stosw and stosb instructions without
12850 rep are shorter, fall back into the next if. */
12852 if ((!optimize || optimize_size)
12855 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12857 emit_insn (gen_cld ());
12859 countreg = ix86_zero_extend_to_Pmode (count_exp);
12860 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12861 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12862 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12864 else if (count != 0
12866 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12867 || optimize_size || count < (unsigned int) 64))
12869 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12870 unsigned HOST_WIDE_INT offset = 0;
12872 emit_insn (gen_cld ());
12874 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12875 if (count & ~(size - 1))
12877 unsigned HOST_WIDE_INT repcount;
12878 unsigned int max_nonrep;
12880 repcount = count >> (size == 4 ? 2 : 3);
12882 repcount &= 0x3fffffff;
12884 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12885 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12886 bytes. In both cases the latter seems to be faster for small
12888 max_nonrep = size == 4 ? 7 : 4;
12889 if (!optimize_size)
12892 case PROCESSOR_PENTIUM4:
12893 case PROCESSOR_NOCONA:
12900 if (repcount <= max_nonrep)
12901 while (repcount-- > 0)
12903 rtx mem = adjust_automodify_address_nv (dst,
12904 GET_MODE (zeroreg),
12906 emit_insn (gen_strset (destreg, mem, zeroreg));
12911 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12912 countreg = ix86_zero_extend_to_Pmode (countreg);
12913 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12914 GEN_INT (size == 4 ? 2 : 3));
12915 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12916 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12918 offset = count & ~(size - 1);
12921 if (size == 8 && (count & 0x04))
12923 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12925 emit_insn (gen_strset (destreg, mem,
12926 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12931 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12933 emit_insn (gen_strset (destreg, mem,
12934 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12939 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12941 emit_insn (gen_strset (destreg, mem,
12942 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12949 /* Compute desired alignment of the string operation. */
12950 int desired_alignment = (TARGET_PENTIUMPRO
12951 && (count == 0 || count >= (unsigned int) 260)
12952 ? 8 : UNITS_PER_WORD);
12954 /* In case we don't know anything about the alignment, default to
12955 library version, since it is usually equally fast and result in
12958 Also emit call when we know that the count is large and call overhead
12959 will not be important. */
12960 if (!TARGET_INLINE_ALL_STRINGOPS
12961 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12964 if (TARGET_SINGLE_STRINGOP)
12965 emit_insn (gen_cld ());
12967 countreg2 = gen_reg_rtx (Pmode);
12968 countreg = copy_to_mode_reg (counter_mode, count_exp);
12969 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12970 /* Get rid of MEM_OFFSET, it won't be accurate. */
12971 dst = change_address (dst, BLKmode, destreg);
12973 if (count == 0 && align < desired_alignment)
12975 label = gen_label_rtx ();
12976 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12977 LEU, 0, counter_mode, 1, label);
12981 rtx label = ix86_expand_aligntest (destreg, 1);
12982 emit_insn (gen_strset (destreg, dst,
12983 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12984 ix86_adjust_counter (countreg, 1);
12985 emit_label (label);
12986 LABEL_NUSES (label) = 1;
12990 rtx label = ix86_expand_aligntest (destreg, 2);
12991 emit_insn (gen_strset (destreg, dst,
12992 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12993 ix86_adjust_counter (countreg, 2);
12994 emit_label (label);
12995 LABEL_NUSES (label) = 1;
12997 if (align <= 4 && desired_alignment > 4)
12999 rtx label = ix86_expand_aligntest (destreg, 4);
13000 emit_insn (gen_strset (destreg, dst,
13002 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13004 ix86_adjust_counter (countreg, 4);
13005 emit_label (label);
13006 LABEL_NUSES (label) = 1;
13009 if (label && desired_alignment > 4 && !TARGET_64BIT)
13011 emit_label (label);
13012 LABEL_NUSES (label) = 1;
13016 if (!TARGET_SINGLE_STRINGOP)
13017 emit_insn (gen_cld ());
13020 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13022 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13026 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13027 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13029 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13030 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13034 emit_label (label);
13035 LABEL_NUSES (label) = 1;
13038 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13039 emit_insn (gen_strset (destreg, dst,
13040 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13041 if (TARGET_64BIT && (align <= 4 || count == 0))
13043 rtx label = ix86_expand_aligntest (countreg, 4);
13044 emit_insn (gen_strset (destreg, dst,
13045 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13046 emit_label (label);
13047 LABEL_NUSES (label) = 1;
13049 if (align > 2 && count != 0 && (count & 2))
13050 emit_insn (gen_strset (destreg, dst,
13051 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13052 if (align <= 2 || count == 0)
13054 rtx label = ix86_expand_aligntest (countreg, 2);
13055 emit_insn (gen_strset (destreg, dst,
13056 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13057 emit_label (label);
13058 LABEL_NUSES (label) = 1;
13060 if (align > 1 && count != 0 && (count & 1))
13061 emit_insn (gen_strset (destreg, dst,
13062 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13063 if (align <= 1 || count == 0)
13065 rtx label = ix86_expand_aligntest (countreg, 1);
13066 emit_insn (gen_strset (destreg, dst,
13067 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13068 emit_label (label);
13069 LABEL_NUSES (label) = 1;
13075 /* Expand strlen. */
13077 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13079 rtx addr, scratch1, scratch2, scratch3, scratch4;
13081 /* The generic case of strlen expander is long. Avoid it's
13082 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13084 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13085 && !TARGET_INLINE_ALL_STRINGOPS
13087 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13090 addr = force_reg (Pmode, XEXP (src, 0));
13091 scratch1 = gen_reg_rtx (Pmode);
13093 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13096 /* Well it seems that some optimizer does not combine a call like
13097 foo(strlen(bar), strlen(bar));
13098 when the move and the subtraction is done here. It does calculate
13099 the length just once when these instructions are done inside of
13100 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13101 often used and I use one fewer register for the lifetime of
13102 output_strlen_unroll() this is better. */
13104 emit_move_insn (out, addr);
13106 ix86_expand_strlensi_unroll_1 (out, src, align);
13108 /* strlensi_unroll_1 returns the address of the zero at the end of
13109 the string, like memchr(), so compute the length by subtracting
13110 the start address. */
13112 emit_insn (gen_subdi3 (out, out, addr));
13114 emit_insn (gen_subsi3 (out, out, addr));
13119 scratch2 = gen_reg_rtx (Pmode);
13120 scratch3 = gen_reg_rtx (Pmode);
13121 scratch4 = force_reg (Pmode, constm1_rtx);
13123 emit_move_insn (scratch3, addr);
13124 eoschar = force_reg (QImode, eoschar);
13126 emit_insn (gen_cld ());
13127 src = replace_equiv_address_nv (src, scratch3);
13129 /* If .md starts supporting :P, this can be done in .md. */
13130 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13131 scratch4), UNSPEC_SCAS);
13132 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13135 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13136 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13140 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13141 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13147 /* Expand the appropriate insns for doing strlen if not just doing
13150 out = result, initialized with the start address
13151 align_rtx = alignment of the address.
13152 scratch = scratch register, initialized with the startaddress when
13153 not aligned, otherwise undefined
13155 This is just the body. It needs the initializations mentioned above and
13156 some address computing at the end. These things are done in i386.md. */
13159 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13163 rtx align_2_label = NULL_RTX;
13164 rtx align_3_label = NULL_RTX;
13165 rtx align_4_label = gen_label_rtx ();
13166 rtx end_0_label = gen_label_rtx ();
13168 rtx tmpreg = gen_reg_rtx (SImode);
13169 rtx scratch = gen_reg_rtx (SImode);
13173 if (GET_CODE (align_rtx) == CONST_INT)
13174 align = INTVAL (align_rtx);
13176 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13178 /* Is there a known alignment and is it less than 4? */
13181 rtx scratch1 = gen_reg_rtx (Pmode);
13182 emit_move_insn (scratch1, out);
13183 /* Is there a known alignment and is it not 2? */
13186 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13187 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13189 /* Leave just the 3 lower bits. */
13190 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13191 NULL_RTX, 0, OPTAB_WIDEN);
13193 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13194 Pmode, 1, align_4_label);
13195 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13196 Pmode, 1, align_2_label);
13197 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13198 Pmode, 1, align_3_label);
13202 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13203 check if is aligned to 4 - byte. */
13205 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13206 NULL_RTX, 0, OPTAB_WIDEN);
13208 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13209 Pmode, 1, align_4_label);
13212 mem = change_address (src, QImode, out);
13214 /* Now compare the bytes. */
13216 /* Compare the first n unaligned byte on a byte per byte basis. */
13217 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13218 QImode, 1, end_0_label);
13220 /* Increment the address. */
13222 emit_insn (gen_adddi3 (out, out, const1_rtx));
13224 emit_insn (gen_addsi3 (out, out, const1_rtx));
13226 /* Not needed with an alignment of 2 */
13229 emit_label (align_2_label);
13231 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13235 emit_insn (gen_adddi3 (out, out, const1_rtx));
13237 emit_insn (gen_addsi3 (out, out, const1_rtx));
13239 emit_label (align_3_label);
13242 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13246 emit_insn (gen_adddi3 (out, out, const1_rtx));
13248 emit_insn (gen_addsi3 (out, out, const1_rtx));
13251 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13252 align this loop. It gives only huge programs, but does not help to
13254 emit_label (align_4_label);
13256 mem = change_address (src, SImode, out);
13257 emit_move_insn (scratch, mem);
13259 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13261 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13263 /* This formula yields a nonzero result iff one of the bytes is zero.
13264 This saves three branches inside loop and many cycles. */
13266 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13267 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13268 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13269 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13270 gen_int_mode (0x80808080, SImode)));
13271 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13276 rtx reg = gen_reg_rtx (SImode);
13277 rtx reg2 = gen_reg_rtx (Pmode);
13278 emit_move_insn (reg, tmpreg);
13279 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13281 /* If zero is not in the first two bytes, move two bytes forward. */
13282 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13283 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13284 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13285 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13286 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13289 /* Emit lea manually to avoid clobbering of flags. */
13290 emit_insn (gen_rtx_SET (SImode, reg2,
13291 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13293 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13294 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13295 emit_insn (gen_rtx_SET (VOIDmode, out,
13296 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13303 rtx end_2_label = gen_label_rtx ();
13304 /* Is zero in the first two bytes? */
13306 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13307 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13308 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13309 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13310 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13312 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13313 JUMP_LABEL (tmp) = end_2_label;
13315 /* Not in the first two. Move two bytes forward. */
13316 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13318 emit_insn (gen_adddi3 (out, out, const2_rtx));
13320 emit_insn (gen_addsi3 (out, out, const2_rtx));
13322 emit_label (end_2_label);
13326 /* Avoid branch in fixing the byte. */
13327 tmpreg = gen_lowpart (QImode, tmpreg);
13328 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13329 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13331 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13333 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13335 emit_label (end_0_label);
13339 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13340 rtx callarg2 ATTRIBUTE_UNUSED,
13341 rtx pop, int sibcall)
13343 rtx use = NULL, call;
13345 if (pop == const0_rtx)
13347 gcc_assert (!TARGET_64BIT || !pop);
13349 if (TARGET_MACHO && !TARGET_64BIT)
13352 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13353 fnaddr = machopic_indirect_call_target (fnaddr);
13358 /* Static functions and indirect calls don't need the pic register. */
13359 if (! TARGET_64BIT && flag_pic
13360 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13361 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13362 use_reg (&use, pic_offset_table_rtx);
13365 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13367 rtx al = gen_rtx_REG (QImode, 0);
13368 emit_move_insn (al, callarg2);
13369 use_reg (&use, al);
13372 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13374 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13375 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13377 if (sibcall && TARGET_64BIT
13378 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13381 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13382 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13383 emit_move_insn (fnaddr, addr);
13384 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13387 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13389 call = gen_rtx_SET (VOIDmode, retval, call);
13392 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13393 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13394 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13397 call = emit_call_insn (call);
13399 CALL_INSN_FUNCTION_USAGE (call) = use;
13403 /* Clear stack slot assignments remembered from previous functions.
13404 This is called from INIT_EXPANDERS once before RTL is emitted for each
13407 static struct machine_function *
13408 ix86_init_machine_status (void)
13410 struct machine_function *f;
13412 f = ggc_alloc_cleared (sizeof (struct machine_function));
13413 f->use_fast_prologue_epilogue_nregs = -1;
13414 f->tls_descriptor_call_expanded_p = 0;
13419 /* Return a MEM corresponding to a stack slot with mode MODE.
13420 Allocate a new slot if necessary.
13422 The RTL for a function can have several slots available: N is
13423 which slot to use. */
13426 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13428 struct stack_local_entry *s;
13430 gcc_assert (n < MAX_386_STACK_LOCALS);
13432 for (s = ix86_stack_locals; s; s = s->next)
13433 if (s->mode == mode && s->n == n)
13436 s = (struct stack_local_entry *)
13437 ggc_alloc (sizeof (struct stack_local_entry));
13440 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13442 s->next = ix86_stack_locals;
13443 ix86_stack_locals = s;
13447 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13449 static GTY(()) rtx ix86_tls_symbol;
13451 ix86_tls_get_addr (void)
13454 if (!ix86_tls_symbol)
13456 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13457 (TARGET_ANY_GNU_TLS
13459 ? "___tls_get_addr"
13460 : "__tls_get_addr");
13463 return ix86_tls_symbol;
13466 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13468 static GTY(()) rtx ix86_tls_module_base_symbol;
13470 ix86_tls_module_base (void)
13473 if (!ix86_tls_module_base_symbol)
13475 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13476 "_TLS_MODULE_BASE_");
13477 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13478 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13481 return ix86_tls_module_base_symbol;
13484 /* Calculate the length of the memory address in the instruction
13485 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13488 memory_address_length (rtx addr)
13490 struct ix86_address parts;
13491 rtx base, index, disp;
13495 if (GET_CODE (addr) == PRE_DEC
13496 || GET_CODE (addr) == POST_INC
13497 || GET_CODE (addr) == PRE_MODIFY
13498 || GET_CODE (addr) == POST_MODIFY)
13501 ok = ix86_decompose_address (addr, &parts);
13504 if (parts.base && GET_CODE (parts.base) == SUBREG)
13505 parts.base = SUBREG_REG (parts.base);
13506 if (parts.index && GET_CODE (parts.index) == SUBREG)
13507 parts.index = SUBREG_REG (parts.index);
13510 index = parts.index;
13515 - esp as the base always wants an index,
13516 - ebp as the base always wants a displacement. */
13518 /* Register Indirect. */
13519 if (base && !index && !disp)
13521 /* esp (for its index) and ebp (for its displacement) need
13522 the two-byte modrm form. */
13523 if (addr == stack_pointer_rtx
13524 || addr == arg_pointer_rtx
13525 || addr == frame_pointer_rtx
13526 || addr == hard_frame_pointer_rtx)
13530 /* Direct Addressing. */
13531 else if (disp && !base && !index)
13536 /* Find the length of the displacement constant. */
13539 if (base && satisfies_constraint_K (disp))
13544 /* ebp always wants a displacement. */
13545 else if (base == hard_frame_pointer_rtx)
13548 /* An index requires the two-byte modrm form.... */
13550 /* ...like esp, which always wants an index. */
13551 || base == stack_pointer_rtx
13552 || base == arg_pointer_rtx
13553 || base == frame_pointer_rtx)
13560 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13561 is set, expect that insn have 8bit immediate alternative. */
13563 ix86_attr_length_immediate_default (rtx insn, int shortform)
13567 extract_insn_cached (insn);
13568 for (i = recog_data.n_operands - 1; i >= 0; --i)
13569 if (CONSTANT_P (recog_data.operand[i]))
13572 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13576 switch (get_attr_mode (insn))
13587 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13592 fatal_insn ("unknown insn mode", insn);
13598 /* Compute default value for "length_address" attribute. */
13600 ix86_attr_length_address_default (rtx insn)
13604 if (get_attr_type (insn) == TYPE_LEA)
13606 rtx set = PATTERN (insn);
13608 if (GET_CODE (set) == PARALLEL)
13609 set = XVECEXP (set, 0, 0);
13611 gcc_assert (GET_CODE (set) == SET);
13613 return memory_address_length (SET_SRC (set));
13616 extract_insn_cached (insn);
13617 for (i = recog_data.n_operands - 1; i >= 0; --i)
13618 if (GET_CODE (recog_data.operand[i]) == MEM)
13620 return memory_address_length (XEXP (recog_data.operand[i], 0));
13626 /* Return the maximum number of instructions a cpu can issue. */
13629 ix86_issue_rate (void)
13633 case PROCESSOR_PENTIUM:
13637 case PROCESSOR_PENTIUMPRO:
13638 case PROCESSOR_PENTIUM4:
13639 case PROCESSOR_ATHLON:
13641 case PROCESSOR_NOCONA:
13642 case PROCESSOR_GENERIC32:
13643 case PROCESSOR_GENERIC64:
13651 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13652 by DEP_INSN and nothing set by DEP_INSN. */
13655 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13659 /* Simplify the test for uninteresting insns. */
13660 if (insn_type != TYPE_SETCC
13661 && insn_type != TYPE_ICMOV
13662 && insn_type != TYPE_FCMOV
13663 && insn_type != TYPE_IBR)
13666 if ((set = single_set (dep_insn)) != 0)
13668 set = SET_DEST (set);
13671 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13672 && XVECLEN (PATTERN (dep_insn), 0) == 2
13673 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13674 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13676 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13677 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13682 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13685 /* This test is true if the dependent insn reads the flags but
13686 not any other potentially set register. */
13687 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13690 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13696 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13697 address with operands set by DEP_INSN. */
13700 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13704 if (insn_type == TYPE_LEA
13707 addr = PATTERN (insn);
13709 if (GET_CODE (addr) == PARALLEL)
13710 addr = XVECEXP (addr, 0, 0);
13712 gcc_assert (GET_CODE (addr) == SET);
13714 addr = SET_SRC (addr);
13719 extract_insn_cached (insn);
13720 for (i = recog_data.n_operands - 1; i >= 0; --i)
13721 if (GET_CODE (recog_data.operand[i]) == MEM)
13723 addr = XEXP (recog_data.operand[i], 0);
13730 return modified_in_p (addr, dep_insn);
13734 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13736 enum attr_type insn_type, dep_insn_type;
13737 enum attr_memory memory;
13739 int dep_insn_code_number;
13741 /* Anti and output dependencies have zero cost on all CPUs. */
13742 if (REG_NOTE_KIND (link) != 0)
13745 dep_insn_code_number = recog_memoized (dep_insn);
13747 /* If we can't recognize the insns, we can't really do anything. */
13748 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13751 insn_type = get_attr_type (insn);
13752 dep_insn_type = get_attr_type (dep_insn);
13756 case PROCESSOR_PENTIUM:
13757 /* Address Generation Interlock adds a cycle of latency. */
13758 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13761 /* ??? Compares pair with jump/setcc. */
13762 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13765 /* Floating point stores require value to be ready one cycle earlier. */
13766 if (insn_type == TYPE_FMOV
13767 && get_attr_memory (insn) == MEMORY_STORE
13768 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13772 case PROCESSOR_PENTIUMPRO:
13773 memory = get_attr_memory (insn);
13775 /* INT->FP conversion is expensive. */
13776 if (get_attr_fp_int_src (dep_insn))
13779 /* There is one cycle extra latency between an FP op and a store. */
13780 if (insn_type == TYPE_FMOV
13781 && (set = single_set (dep_insn)) != NULL_RTX
13782 && (set2 = single_set (insn)) != NULL_RTX
13783 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13784 && GET_CODE (SET_DEST (set2)) == MEM)
13787 /* Show ability of reorder buffer to hide latency of load by executing
13788 in parallel with previous instruction in case
13789 previous instruction is not needed to compute the address. */
13790 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13791 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13793 /* Claim moves to take one cycle, as core can issue one load
13794 at time and the next load can start cycle later. */
13795 if (dep_insn_type == TYPE_IMOV
13796 || dep_insn_type == TYPE_FMOV)
13804 memory = get_attr_memory (insn);
13806 /* The esp dependency is resolved before the instruction is really
13808 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13809 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13812 /* INT->FP conversion is expensive. */
13813 if (get_attr_fp_int_src (dep_insn))
13816 /* Show ability of reorder buffer to hide latency of load by executing
13817 in parallel with previous instruction in case
13818 previous instruction is not needed to compute the address. */
13819 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13820 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13822 /* Claim moves to take one cycle, as core can issue one load
13823 at time and the next load can start cycle later. */
13824 if (dep_insn_type == TYPE_IMOV
13825 || dep_insn_type == TYPE_FMOV)
13834 case PROCESSOR_ATHLON:
13836 case PROCESSOR_GENERIC32:
13837 case PROCESSOR_GENERIC64:
13838 memory = get_attr_memory (insn);
13840 /* Show ability of reorder buffer to hide latency of load by executing
13841 in parallel with previous instruction in case
13842 previous instruction is not needed to compute the address. */
13843 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13844 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13846 enum attr_unit unit = get_attr_unit (insn);
13849 /* Because of the difference between the length of integer and
13850 floating unit pipeline preparation stages, the memory operands
13851 for floating point are cheaper.
13853 ??? For Athlon it the difference is most probably 2. */
13854 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13857 loadcost = TARGET_ATHLON ? 2 : 0;
13859 if (cost >= loadcost)
13872 /* How many alternative schedules to try. This should be as wide as the
13873 scheduling freedom in the DFA, but no wider. Making this value too
13874 large results extra work for the scheduler. */
13877 ia32_multipass_dfa_lookahead (void)
13879 if (ix86_tune == PROCESSOR_PENTIUM)
13882 if (ix86_tune == PROCESSOR_PENTIUMPRO
13883 || ix86_tune == PROCESSOR_K6)
13891 /* Compute the alignment given to a constant that is being placed in memory.
13892 EXP is the constant and ALIGN is the alignment that the object would
13894 The value of this function is used instead of that alignment to align
13898 ix86_constant_alignment (tree exp, int align)
13900 if (TREE_CODE (exp) == REAL_CST)
13902 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13904 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13907 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13908 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13909 return BITS_PER_WORD;
13914 /* Compute the alignment for a static variable.
13915 TYPE is the data type, and ALIGN is the alignment that
13916 the object would ordinarily have. The value of this function is used
13917 instead of that alignment to align the object. */
13920 ix86_data_alignment (tree type, int align)
13922 int max_align = optimize_size ? BITS_PER_WORD : 256;
13924 if (AGGREGATE_TYPE_P (type)
13925 && TYPE_SIZE (type)
13926 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13927 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13928 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13929 && align < max_align)
13932 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13933 to 16byte boundary. */
13936 if (AGGREGATE_TYPE_P (type)
13937 && TYPE_SIZE (type)
13938 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13939 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13940 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13944 if (TREE_CODE (type) == ARRAY_TYPE)
13946 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13948 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13951 else if (TREE_CODE (type) == COMPLEX_TYPE)
13954 if (TYPE_MODE (type) == DCmode && align < 64)
13956 if (TYPE_MODE (type) == XCmode && align < 128)
13959 else if ((TREE_CODE (type) == RECORD_TYPE
13960 || TREE_CODE (type) == UNION_TYPE
13961 || TREE_CODE (type) == QUAL_UNION_TYPE)
13962 && TYPE_FIELDS (type))
13964 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13966 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13969 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13970 || TREE_CODE (type) == INTEGER_TYPE)
13972 if (TYPE_MODE (type) == DFmode && align < 64)
13974 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13981 /* Compute the alignment for a local variable.
13982 TYPE is the data type, and ALIGN is the alignment that
13983 the object would ordinarily have. The value of this macro is used
13984 instead of that alignment to align the object. */
13987 ix86_local_alignment (tree type, int align)
13989 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13990 to 16byte boundary. */
13993 if (AGGREGATE_TYPE_P (type)
13994 && TYPE_SIZE (type)
13995 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13996 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
13997 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14000 if (TREE_CODE (type) == ARRAY_TYPE)
14002 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14004 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14007 else if (TREE_CODE (type) == COMPLEX_TYPE)
14009 if (TYPE_MODE (type) == DCmode && align < 64)
14011 if (TYPE_MODE (type) == XCmode && align < 128)
14014 else if ((TREE_CODE (type) == RECORD_TYPE
14015 || TREE_CODE (type) == UNION_TYPE
14016 || TREE_CODE (type) == QUAL_UNION_TYPE)
14017 && TYPE_FIELDS (type))
14019 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14021 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14024 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14025 || TREE_CODE (type) == INTEGER_TYPE)
14028 if (TYPE_MODE (type) == DFmode && align < 64)
14030 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14036 /* Emit RTL insns to initialize the variable parts of a trampoline.
14037 FNADDR is an RTX for the address of the function's pure code.
14038 CXT is an RTX for the static chain value for the function. */
14040 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14044 /* Compute offset from the end of the jmp to the target function. */
14045 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14046 plus_constant (tramp, 10),
14047 NULL_RTX, 1, OPTAB_DIRECT);
14048 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14049 gen_int_mode (0xb9, QImode));
14050 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14051 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14052 gen_int_mode (0xe9, QImode));
14053 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14058 /* Try to load address using shorter movl instead of movabs.
14059 We may want to support movq for kernel mode, but kernel does not use
14060 trampolines at the moment. */
14061 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14063 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14064 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14065 gen_int_mode (0xbb41, HImode));
14066 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14067 gen_lowpart (SImode, fnaddr));
14072 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14073 gen_int_mode (0xbb49, HImode));
14074 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14078 /* Load static chain using movabs to r10. */
14079 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14080 gen_int_mode (0xba49, HImode));
14081 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14084 /* Jump to the r11 */
14085 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14086 gen_int_mode (0xff49, HImode));
14087 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14088 gen_int_mode (0xe3, QImode));
14090 gcc_assert (offset <= TRAMPOLINE_SIZE);
14093 #ifdef ENABLE_EXECUTE_STACK
14094 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14095 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14099 /* Codes for all the SSE/MMX builtins. */
14102 IX86_BUILTIN_ADDPS,
14103 IX86_BUILTIN_ADDSS,
14104 IX86_BUILTIN_DIVPS,
14105 IX86_BUILTIN_DIVSS,
14106 IX86_BUILTIN_MULPS,
14107 IX86_BUILTIN_MULSS,
14108 IX86_BUILTIN_SUBPS,
14109 IX86_BUILTIN_SUBSS,
14111 IX86_BUILTIN_CMPEQPS,
14112 IX86_BUILTIN_CMPLTPS,
14113 IX86_BUILTIN_CMPLEPS,
14114 IX86_BUILTIN_CMPGTPS,
14115 IX86_BUILTIN_CMPGEPS,
14116 IX86_BUILTIN_CMPNEQPS,
14117 IX86_BUILTIN_CMPNLTPS,
14118 IX86_BUILTIN_CMPNLEPS,
14119 IX86_BUILTIN_CMPNGTPS,
14120 IX86_BUILTIN_CMPNGEPS,
14121 IX86_BUILTIN_CMPORDPS,
14122 IX86_BUILTIN_CMPUNORDPS,
14123 IX86_BUILTIN_CMPEQSS,
14124 IX86_BUILTIN_CMPLTSS,
14125 IX86_BUILTIN_CMPLESS,
14126 IX86_BUILTIN_CMPNEQSS,
14127 IX86_BUILTIN_CMPNLTSS,
14128 IX86_BUILTIN_CMPNLESS,
14129 IX86_BUILTIN_CMPNGTSS,
14130 IX86_BUILTIN_CMPNGESS,
14131 IX86_BUILTIN_CMPORDSS,
14132 IX86_BUILTIN_CMPUNORDSS,
14134 IX86_BUILTIN_COMIEQSS,
14135 IX86_BUILTIN_COMILTSS,
14136 IX86_BUILTIN_COMILESS,
14137 IX86_BUILTIN_COMIGTSS,
14138 IX86_BUILTIN_COMIGESS,
14139 IX86_BUILTIN_COMINEQSS,
14140 IX86_BUILTIN_UCOMIEQSS,
14141 IX86_BUILTIN_UCOMILTSS,
14142 IX86_BUILTIN_UCOMILESS,
14143 IX86_BUILTIN_UCOMIGTSS,
14144 IX86_BUILTIN_UCOMIGESS,
14145 IX86_BUILTIN_UCOMINEQSS,
14147 IX86_BUILTIN_CVTPI2PS,
14148 IX86_BUILTIN_CVTPS2PI,
14149 IX86_BUILTIN_CVTSI2SS,
14150 IX86_BUILTIN_CVTSI642SS,
14151 IX86_BUILTIN_CVTSS2SI,
14152 IX86_BUILTIN_CVTSS2SI64,
14153 IX86_BUILTIN_CVTTPS2PI,
14154 IX86_BUILTIN_CVTTSS2SI,
14155 IX86_BUILTIN_CVTTSS2SI64,
14157 IX86_BUILTIN_MAXPS,
14158 IX86_BUILTIN_MAXSS,
14159 IX86_BUILTIN_MINPS,
14160 IX86_BUILTIN_MINSS,
14162 IX86_BUILTIN_LOADUPS,
14163 IX86_BUILTIN_STOREUPS,
14164 IX86_BUILTIN_MOVSS,
14166 IX86_BUILTIN_MOVHLPS,
14167 IX86_BUILTIN_MOVLHPS,
14168 IX86_BUILTIN_LOADHPS,
14169 IX86_BUILTIN_LOADLPS,
14170 IX86_BUILTIN_STOREHPS,
14171 IX86_BUILTIN_STORELPS,
14173 IX86_BUILTIN_MASKMOVQ,
14174 IX86_BUILTIN_MOVMSKPS,
14175 IX86_BUILTIN_PMOVMSKB,
14177 IX86_BUILTIN_MOVNTPS,
14178 IX86_BUILTIN_MOVNTQ,
14180 IX86_BUILTIN_LOADDQU,
14181 IX86_BUILTIN_STOREDQU,
14183 IX86_BUILTIN_PACKSSWB,
14184 IX86_BUILTIN_PACKSSDW,
14185 IX86_BUILTIN_PACKUSWB,
14187 IX86_BUILTIN_PADDB,
14188 IX86_BUILTIN_PADDW,
14189 IX86_BUILTIN_PADDD,
14190 IX86_BUILTIN_PADDQ,
14191 IX86_BUILTIN_PADDSB,
14192 IX86_BUILTIN_PADDSW,
14193 IX86_BUILTIN_PADDUSB,
14194 IX86_BUILTIN_PADDUSW,
14195 IX86_BUILTIN_PSUBB,
14196 IX86_BUILTIN_PSUBW,
14197 IX86_BUILTIN_PSUBD,
14198 IX86_BUILTIN_PSUBQ,
14199 IX86_BUILTIN_PSUBSB,
14200 IX86_BUILTIN_PSUBSW,
14201 IX86_BUILTIN_PSUBUSB,
14202 IX86_BUILTIN_PSUBUSW,
14205 IX86_BUILTIN_PANDN,
14209 IX86_BUILTIN_PAVGB,
14210 IX86_BUILTIN_PAVGW,
14212 IX86_BUILTIN_PCMPEQB,
14213 IX86_BUILTIN_PCMPEQW,
14214 IX86_BUILTIN_PCMPEQD,
14215 IX86_BUILTIN_PCMPGTB,
14216 IX86_BUILTIN_PCMPGTW,
14217 IX86_BUILTIN_PCMPGTD,
14219 IX86_BUILTIN_PMADDWD,
14221 IX86_BUILTIN_PMAXSW,
14222 IX86_BUILTIN_PMAXUB,
14223 IX86_BUILTIN_PMINSW,
14224 IX86_BUILTIN_PMINUB,
14226 IX86_BUILTIN_PMULHUW,
14227 IX86_BUILTIN_PMULHW,
14228 IX86_BUILTIN_PMULLW,
14230 IX86_BUILTIN_PSADBW,
14231 IX86_BUILTIN_PSHUFW,
14233 IX86_BUILTIN_PSLLW,
14234 IX86_BUILTIN_PSLLD,
14235 IX86_BUILTIN_PSLLQ,
14236 IX86_BUILTIN_PSRAW,
14237 IX86_BUILTIN_PSRAD,
14238 IX86_BUILTIN_PSRLW,
14239 IX86_BUILTIN_PSRLD,
14240 IX86_BUILTIN_PSRLQ,
14241 IX86_BUILTIN_PSLLWI,
14242 IX86_BUILTIN_PSLLDI,
14243 IX86_BUILTIN_PSLLQI,
14244 IX86_BUILTIN_PSRAWI,
14245 IX86_BUILTIN_PSRADI,
14246 IX86_BUILTIN_PSRLWI,
14247 IX86_BUILTIN_PSRLDI,
14248 IX86_BUILTIN_PSRLQI,
14250 IX86_BUILTIN_PUNPCKHBW,
14251 IX86_BUILTIN_PUNPCKHWD,
14252 IX86_BUILTIN_PUNPCKHDQ,
14253 IX86_BUILTIN_PUNPCKLBW,
14254 IX86_BUILTIN_PUNPCKLWD,
14255 IX86_BUILTIN_PUNPCKLDQ,
14257 IX86_BUILTIN_SHUFPS,
14259 IX86_BUILTIN_RCPPS,
14260 IX86_BUILTIN_RCPSS,
14261 IX86_BUILTIN_RSQRTPS,
14262 IX86_BUILTIN_RSQRTSS,
14263 IX86_BUILTIN_SQRTPS,
14264 IX86_BUILTIN_SQRTSS,
14266 IX86_BUILTIN_UNPCKHPS,
14267 IX86_BUILTIN_UNPCKLPS,
14269 IX86_BUILTIN_ANDPS,
14270 IX86_BUILTIN_ANDNPS,
14272 IX86_BUILTIN_XORPS,
14275 IX86_BUILTIN_LDMXCSR,
14276 IX86_BUILTIN_STMXCSR,
14277 IX86_BUILTIN_SFENCE,
14279 /* 3DNow! Original */
14280 IX86_BUILTIN_FEMMS,
14281 IX86_BUILTIN_PAVGUSB,
14282 IX86_BUILTIN_PF2ID,
14283 IX86_BUILTIN_PFACC,
14284 IX86_BUILTIN_PFADD,
14285 IX86_BUILTIN_PFCMPEQ,
14286 IX86_BUILTIN_PFCMPGE,
14287 IX86_BUILTIN_PFCMPGT,
14288 IX86_BUILTIN_PFMAX,
14289 IX86_BUILTIN_PFMIN,
14290 IX86_BUILTIN_PFMUL,
14291 IX86_BUILTIN_PFRCP,
14292 IX86_BUILTIN_PFRCPIT1,
14293 IX86_BUILTIN_PFRCPIT2,
14294 IX86_BUILTIN_PFRSQIT1,
14295 IX86_BUILTIN_PFRSQRT,
14296 IX86_BUILTIN_PFSUB,
14297 IX86_BUILTIN_PFSUBR,
14298 IX86_BUILTIN_PI2FD,
14299 IX86_BUILTIN_PMULHRW,
14301 /* 3DNow! Athlon Extensions */
14302 IX86_BUILTIN_PF2IW,
14303 IX86_BUILTIN_PFNACC,
14304 IX86_BUILTIN_PFPNACC,
14305 IX86_BUILTIN_PI2FW,
14306 IX86_BUILTIN_PSWAPDSI,
14307 IX86_BUILTIN_PSWAPDSF,
14310 IX86_BUILTIN_ADDPD,
14311 IX86_BUILTIN_ADDSD,
14312 IX86_BUILTIN_DIVPD,
14313 IX86_BUILTIN_DIVSD,
14314 IX86_BUILTIN_MULPD,
14315 IX86_BUILTIN_MULSD,
14316 IX86_BUILTIN_SUBPD,
14317 IX86_BUILTIN_SUBSD,
14319 IX86_BUILTIN_CMPEQPD,
14320 IX86_BUILTIN_CMPLTPD,
14321 IX86_BUILTIN_CMPLEPD,
14322 IX86_BUILTIN_CMPGTPD,
14323 IX86_BUILTIN_CMPGEPD,
14324 IX86_BUILTIN_CMPNEQPD,
14325 IX86_BUILTIN_CMPNLTPD,
14326 IX86_BUILTIN_CMPNLEPD,
14327 IX86_BUILTIN_CMPNGTPD,
14328 IX86_BUILTIN_CMPNGEPD,
14329 IX86_BUILTIN_CMPORDPD,
14330 IX86_BUILTIN_CMPUNORDPD,
14331 IX86_BUILTIN_CMPNEPD,
14332 IX86_BUILTIN_CMPEQSD,
14333 IX86_BUILTIN_CMPLTSD,
14334 IX86_BUILTIN_CMPLESD,
14335 IX86_BUILTIN_CMPNEQSD,
14336 IX86_BUILTIN_CMPNLTSD,
14337 IX86_BUILTIN_CMPNLESD,
14338 IX86_BUILTIN_CMPORDSD,
14339 IX86_BUILTIN_CMPUNORDSD,
14340 IX86_BUILTIN_CMPNESD,
14342 IX86_BUILTIN_COMIEQSD,
14343 IX86_BUILTIN_COMILTSD,
14344 IX86_BUILTIN_COMILESD,
14345 IX86_BUILTIN_COMIGTSD,
14346 IX86_BUILTIN_COMIGESD,
14347 IX86_BUILTIN_COMINEQSD,
14348 IX86_BUILTIN_UCOMIEQSD,
14349 IX86_BUILTIN_UCOMILTSD,
14350 IX86_BUILTIN_UCOMILESD,
14351 IX86_BUILTIN_UCOMIGTSD,
14352 IX86_BUILTIN_UCOMIGESD,
14353 IX86_BUILTIN_UCOMINEQSD,
14355 IX86_BUILTIN_MAXPD,
14356 IX86_BUILTIN_MAXSD,
14357 IX86_BUILTIN_MINPD,
14358 IX86_BUILTIN_MINSD,
14360 IX86_BUILTIN_ANDPD,
14361 IX86_BUILTIN_ANDNPD,
14363 IX86_BUILTIN_XORPD,
14365 IX86_BUILTIN_SQRTPD,
14366 IX86_BUILTIN_SQRTSD,
14368 IX86_BUILTIN_UNPCKHPD,
14369 IX86_BUILTIN_UNPCKLPD,
14371 IX86_BUILTIN_SHUFPD,
14373 IX86_BUILTIN_LOADUPD,
14374 IX86_BUILTIN_STOREUPD,
14375 IX86_BUILTIN_MOVSD,
14377 IX86_BUILTIN_LOADHPD,
14378 IX86_BUILTIN_LOADLPD,
14380 IX86_BUILTIN_CVTDQ2PD,
14381 IX86_BUILTIN_CVTDQ2PS,
14383 IX86_BUILTIN_CVTPD2DQ,
14384 IX86_BUILTIN_CVTPD2PI,
14385 IX86_BUILTIN_CVTPD2PS,
14386 IX86_BUILTIN_CVTTPD2DQ,
14387 IX86_BUILTIN_CVTTPD2PI,
14389 IX86_BUILTIN_CVTPI2PD,
14390 IX86_BUILTIN_CVTSI2SD,
14391 IX86_BUILTIN_CVTSI642SD,
14393 IX86_BUILTIN_CVTSD2SI,
14394 IX86_BUILTIN_CVTSD2SI64,
14395 IX86_BUILTIN_CVTSD2SS,
14396 IX86_BUILTIN_CVTSS2SD,
14397 IX86_BUILTIN_CVTTSD2SI,
14398 IX86_BUILTIN_CVTTSD2SI64,
14400 IX86_BUILTIN_CVTPS2DQ,
14401 IX86_BUILTIN_CVTPS2PD,
14402 IX86_BUILTIN_CVTTPS2DQ,
14404 IX86_BUILTIN_MOVNTI,
14405 IX86_BUILTIN_MOVNTPD,
14406 IX86_BUILTIN_MOVNTDQ,
14409 IX86_BUILTIN_MASKMOVDQU,
14410 IX86_BUILTIN_MOVMSKPD,
14411 IX86_BUILTIN_PMOVMSKB128,
14413 IX86_BUILTIN_PACKSSWB128,
14414 IX86_BUILTIN_PACKSSDW128,
14415 IX86_BUILTIN_PACKUSWB128,
14417 IX86_BUILTIN_PADDB128,
14418 IX86_BUILTIN_PADDW128,
14419 IX86_BUILTIN_PADDD128,
14420 IX86_BUILTIN_PADDQ128,
14421 IX86_BUILTIN_PADDSB128,
14422 IX86_BUILTIN_PADDSW128,
14423 IX86_BUILTIN_PADDUSB128,
14424 IX86_BUILTIN_PADDUSW128,
14425 IX86_BUILTIN_PSUBB128,
14426 IX86_BUILTIN_PSUBW128,
14427 IX86_BUILTIN_PSUBD128,
14428 IX86_BUILTIN_PSUBQ128,
14429 IX86_BUILTIN_PSUBSB128,
14430 IX86_BUILTIN_PSUBSW128,
14431 IX86_BUILTIN_PSUBUSB128,
14432 IX86_BUILTIN_PSUBUSW128,
14434 IX86_BUILTIN_PAND128,
14435 IX86_BUILTIN_PANDN128,
14436 IX86_BUILTIN_POR128,
14437 IX86_BUILTIN_PXOR128,
14439 IX86_BUILTIN_PAVGB128,
14440 IX86_BUILTIN_PAVGW128,
14442 IX86_BUILTIN_PCMPEQB128,
14443 IX86_BUILTIN_PCMPEQW128,
14444 IX86_BUILTIN_PCMPEQD128,
14445 IX86_BUILTIN_PCMPGTB128,
14446 IX86_BUILTIN_PCMPGTW128,
14447 IX86_BUILTIN_PCMPGTD128,
14449 IX86_BUILTIN_PMADDWD128,
14451 IX86_BUILTIN_PMAXSW128,
14452 IX86_BUILTIN_PMAXUB128,
14453 IX86_BUILTIN_PMINSW128,
14454 IX86_BUILTIN_PMINUB128,
14456 IX86_BUILTIN_PMULUDQ,
14457 IX86_BUILTIN_PMULUDQ128,
14458 IX86_BUILTIN_PMULHUW128,
14459 IX86_BUILTIN_PMULHW128,
14460 IX86_BUILTIN_PMULLW128,
14462 IX86_BUILTIN_PSADBW128,
14463 IX86_BUILTIN_PSHUFHW,
14464 IX86_BUILTIN_PSHUFLW,
14465 IX86_BUILTIN_PSHUFD,
14467 IX86_BUILTIN_PSLLW128,
14468 IX86_BUILTIN_PSLLD128,
14469 IX86_BUILTIN_PSLLQ128,
14470 IX86_BUILTIN_PSRAW128,
14471 IX86_BUILTIN_PSRAD128,
14472 IX86_BUILTIN_PSRLW128,
14473 IX86_BUILTIN_PSRLD128,
14474 IX86_BUILTIN_PSRLQ128,
14475 IX86_BUILTIN_PSLLDQI128,
14476 IX86_BUILTIN_PSLLWI128,
14477 IX86_BUILTIN_PSLLDI128,
14478 IX86_BUILTIN_PSLLQI128,
14479 IX86_BUILTIN_PSRAWI128,
14480 IX86_BUILTIN_PSRADI128,
14481 IX86_BUILTIN_PSRLDQI128,
14482 IX86_BUILTIN_PSRLWI128,
14483 IX86_BUILTIN_PSRLDI128,
14484 IX86_BUILTIN_PSRLQI128,
14486 IX86_BUILTIN_PUNPCKHBW128,
14487 IX86_BUILTIN_PUNPCKHWD128,
14488 IX86_BUILTIN_PUNPCKHDQ128,
14489 IX86_BUILTIN_PUNPCKHQDQ128,
14490 IX86_BUILTIN_PUNPCKLBW128,
14491 IX86_BUILTIN_PUNPCKLWD128,
14492 IX86_BUILTIN_PUNPCKLDQ128,
14493 IX86_BUILTIN_PUNPCKLQDQ128,
14495 IX86_BUILTIN_CLFLUSH,
14496 IX86_BUILTIN_MFENCE,
14497 IX86_BUILTIN_LFENCE,
14499 /* Prescott New Instructions. */
14500 IX86_BUILTIN_ADDSUBPS,
14501 IX86_BUILTIN_HADDPS,
14502 IX86_BUILTIN_HSUBPS,
14503 IX86_BUILTIN_MOVSHDUP,
14504 IX86_BUILTIN_MOVSLDUP,
14505 IX86_BUILTIN_ADDSUBPD,
14506 IX86_BUILTIN_HADDPD,
14507 IX86_BUILTIN_HSUBPD,
14508 IX86_BUILTIN_LDDQU,
14510 IX86_BUILTIN_MONITOR,
14511 IX86_BUILTIN_MWAIT,
14513 IX86_BUILTIN_VEC_INIT_V2SI,
14514 IX86_BUILTIN_VEC_INIT_V4HI,
14515 IX86_BUILTIN_VEC_INIT_V8QI,
14516 IX86_BUILTIN_VEC_EXT_V2DF,
14517 IX86_BUILTIN_VEC_EXT_V2DI,
14518 IX86_BUILTIN_VEC_EXT_V4SF,
14519 IX86_BUILTIN_VEC_EXT_V4SI,
14520 IX86_BUILTIN_VEC_EXT_V8HI,
14521 IX86_BUILTIN_VEC_EXT_V2SI,
14522 IX86_BUILTIN_VEC_EXT_V4HI,
14523 IX86_BUILTIN_VEC_SET_V8HI,
14524 IX86_BUILTIN_VEC_SET_V4HI,
14529 #define def_builtin(MASK, NAME, TYPE, CODE) \
14531 if ((MASK) & target_flags \
14532 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14533 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14534 NULL, NULL_TREE); \
14537 /* Bits for builtin_description.flag. */
14539 /* Set when we don't support the comparison natively, and should
14540 swap_comparison in order to support it. */
14541 #define BUILTIN_DESC_SWAP_OPERANDS 1
14543 struct builtin_description
14545 const unsigned int mask;
14546 const enum insn_code icode;
14547 const char *const name;
14548 const enum ix86_builtins code;
14549 const enum rtx_code comparison;
14550 const unsigned int flag;
14553 static const struct builtin_description bdesc_comi[] =
14555 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14556 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14557 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14558 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14559 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14560 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14561 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14562 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14563 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14564 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14565 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14566 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14567 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14568 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14569 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14570 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14571 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14572 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14573 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14574 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14575 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14576 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14577 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14578 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14581 static const struct builtin_description bdesc_2arg[] =
14584 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14585 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14586 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14587 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14588 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14589 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14590 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14591 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14593 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14594 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14595 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14596 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14597 BUILTIN_DESC_SWAP_OPERANDS },
14598 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14599 BUILTIN_DESC_SWAP_OPERANDS },
14600 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14601 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14602 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14603 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14604 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14605 BUILTIN_DESC_SWAP_OPERANDS },
14606 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14607 BUILTIN_DESC_SWAP_OPERANDS },
14608 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14609 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14610 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14611 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14612 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14613 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14614 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14615 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14616 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14617 BUILTIN_DESC_SWAP_OPERANDS },
14618 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14619 BUILTIN_DESC_SWAP_OPERANDS },
14620 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14622 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14623 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14624 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14625 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14627 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14628 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14629 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14630 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14632 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14633 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14634 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14635 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14636 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14639 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14640 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14641 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14642 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14643 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14644 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14645 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14646 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14648 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14649 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14650 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14651 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14652 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14653 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14654 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14655 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14657 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14658 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14659 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14661 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14662 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14663 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14664 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14666 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14667 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14669 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14670 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14671 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14672 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14673 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14674 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14676 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14677 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14678 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14679 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14681 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14682 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14683 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14684 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14685 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14686 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14689 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14690 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14691 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14693 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14694 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14695 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14697 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14698 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14699 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14700 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14701 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14702 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14704 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14705 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14706 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14707 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14708 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14709 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14711 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14712 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14713 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14714 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14716 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14717 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14720 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14721 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14722 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14723 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14724 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14725 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14726 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14727 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14729 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14730 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14731 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14732 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14733 BUILTIN_DESC_SWAP_OPERANDS },
14734 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14735 BUILTIN_DESC_SWAP_OPERANDS },
14736 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14737 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14738 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14739 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14740 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14741 BUILTIN_DESC_SWAP_OPERANDS },
14742 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14743 BUILTIN_DESC_SWAP_OPERANDS },
14744 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14745 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14746 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14747 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14748 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14749 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14750 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14751 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14752 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14754 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14755 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14756 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14757 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14759 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14760 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14761 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14762 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14764 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14765 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14766 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14769 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14770 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14771 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14772 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14773 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14774 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14775 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14776 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14778 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14779 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14780 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14781 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14782 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14783 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14784 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14785 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14787 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14788 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14790 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14791 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14792 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14793 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14795 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14796 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14798 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14799 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14800 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14801 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14802 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14803 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14805 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14806 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14807 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14808 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14810 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14811 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14812 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14813 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14814 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14815 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14816 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14817 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14819 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14820 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14821 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14823 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14824 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14826 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14827 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14829 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14830 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14831 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14833 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14834 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14835 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14837 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14838 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14840 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14842 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14843 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14844 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14845 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14848 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14849 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14850 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14851 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14852 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14853 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14856 static const struct builtin_description bdesc_1arg[] =
14858 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14859 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14861 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14862 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14863 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14865 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14866 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14867 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14868 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14869 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14870 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14872 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14873 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14875 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14877 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14878 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14880 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14881 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14882 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14883 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14884 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14886 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14888 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14889 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14890 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14891 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14893 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14894 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14895 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14898 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14899 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14903 ix86_init_builtins (void)
14906 ix86_init_mmx_sse_builtins ();
14909 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14910 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14913 ix86_init_mmx_sse_builtins (void)
14915 const struct builtin_description * d;
14918 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14919 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14920 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14921 tree V2DI_type_node
14922 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14923 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14924 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14925 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14926 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14927 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14928 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14930 tree pchar_type_node = build_pointer_type (char_type_node);
14931 tree pcchar_type_node = build_pointer_type (
14932 build_type_variant (char_type_node, 1, 0));
14933 tree pfloat_type_node = build_pointer_type (float_type_node);
14934 tree pcfloat_type_node = build_pointer_type (
14935 build_type_variant (float_type_node, 1, 0));
14936 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14937 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14938 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14941 tree int_ftype_v4sf_v4sf
14942 = build_function_type_list (integer_type_node,
14943 V4SF_type_node, V4SF_type_node, NULL_TREE);
14944 tree v4si_ftype_v4sf_v4sf
14945 = build_function_type_list (V4SI_type_node,
14946 V4SF_type_node, V4SF_type_node, NULL_TREE);
14947 /* MMX/SSE/integer conversions. */
14948 tree int_ftype_v4sf
14949 = build_function_type_list (integer_type_node,
14950 V4SF_type_node, NULL_TREE);
14951 tree int64_ftype_v4sf
14952 = build_function_type_list (long_long_integer_type_node,
14953 V4SF_type_node, NULL_TREE);
14954 tree int_ftype_v8qi
14955 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
14956 tree v4sf_ftype_v4sf_int
14957 = build_function_type_list (V4SF_type_node,
14958 V4SF_type_node, integer_type_node, NULL_TREE);
14959 tree v4sf_ftype_v4sf_int64
14960 = build_function_type_list (V4SF_type_node,
14961 V4SF_type_node, long_long_integer_type_node,
14963 tree v4sf_ftype_v4sf_v2si
14964 = build_function_type_list (V4SF_type_node,
14965 V4SF_type_node, V2SI_type_node, NULL_TREE);
14967 /* Miscellaneous. */
14968 tree v8qi_ftype_v4hi_v4hi
14969 = build_function_type_list (V8QI_type_node,
14970 V4HI_type_node, V4HI_type_node, NULL_TREE);
14971 tree v4hi_ftype_v2si_v2si
14972 = build_function_type_list (V4HI_type_node,
14973 V2SI_type_node, V2SI_type_node, NULL_TREE);
14974 tree v4sf_ftype_v4sf_v4sf_int
14975 = build_function_type_list (V4SF_type_node,
14976 V4SF_type_node, V4SF_type_node,
14977 integer_type_node, NULL_TREE);
14978 tree v2si_ftype_v4hi_v4hi
14979 = build_function_type_list (V2SI_type_node,
14980 V4HI_type_node, V4HI_type_node, NULL_TREE);
14981 tree v4hi_ftype_v4hi_int
14982 = build_function_type_list (V4HI_type_node,
14983 V4HI_type_node, integer_type_node, NULL_TREE);
14984 tree v4hi_ftype_v4hi_di
14985 = build_function_type_list (V4HI_type_node,
14986 V4HI_type_node, long_long_unsigned_type_node,
14988 tree v2si_ftype_v2si_di
14989 = build_function_type_list (V2SI_type_node,
14990 V2SI_type_node, long_long_unsigned_type_node,
14992 tree void_ftype_void
14993 = build_function_type (void_type_node, void_list_node);
14994 tree void_ftype_unsigned
14995 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
14996 tree void_ftype_unsigned_unsigned
14997 = build_function_type_list (void_type_node, unsigned_type_node,
14998 unsigned_type_node, NULL_TREE);
14999 tree void_ftype_pcvoid_unsigned_unsigned
15000 = build_function_type_list (void_type_node, const_ptr_type_node,
15001 unsigned_type_node, unsigned_type_node,
15003 tree unsigned_ftype_void
15004 = build_function_type (unsigned_type_node, void_list_node);
15005 tree v2si_ftype_v4sf
15006 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15007 /* Loads/stores. */
15008 tree void_ftype_v8qi_v8qi_pchar
15009 = build_function_type_list (void_type_node,
15010 V8QI_type_node, V8QI_type_node,
15011 pchar_type_node, NULL_TREE);
15012 tree v4sf_ftype_pcfloat
15013 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15014 /* @@@ the type is bogus */
15015 tree v4sf_ftype_v4sf_pv2si
15016 = build_function_type_list (V4SF_type_node,
15017 V4SF_type_node, pv2si_type_node, NULL_TREE);
15018 tree void_ftype_pv2si_v4sf
15019 = build_function_type_list (void_type_node,
15020 pv2si_type_node, V4SF_type_node, NULL_TREE);
15021 tree void_ftype_pfloat_v4sf
15022 = build_function_type_list (void_type_node,
15023 pfloat_type_node, V4SF_type_node, NULL_TREE);
15024 tree void_ftype_pdi_di
15025 = build_function_type_list (void_type_node,
15026 pdi_type_node, long_long_unsigned_type_node,
15028 tree void_ftype_pv2di_v2di
15029 = build_function_type_list (void_type_node,
15030 pv2di_type_node, V2DI_type_node, NULL_TREE);
15031 /* Normal vector unops. */
15032 tree v4sf_ftype_v4sf
15033 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15035 /* Normal vector binops. */
15036 tree v4sf_ftype_v4sf_v4sf
15037 = build_function_type_list (V4SF_type_node,
15038 V4SF_type_node, V4SF_type_node, NULL_TREE);
15039 tree v8qi_ftype_v8qi_v8qi
15040 = build_function_type_list (V8QI_type_node,
15041 V8QI_type_node, V8QI_type_node, NULL_TREE);
15042 tree v4hi_ftype_v4hi_v4hi
15043 = build_function_type_list (V4HI_type_node,
15044 V4HI_type_node, V4HI_type_node, NULL_TREE);
15045 tree v2si_ftype_v2si_v2si
15046 = build_function_type_list (V2SI_type_node,
15047 V2SI_type_node, V2SI_type_node, NULL_TREE);
15048 tree di_ftype_di_di
15049 = build_function_type_list (long_long_unsigned_type_node,
15050 long_long_unsigned_type_node,
15051 long_long_unsigned_type_node, NULL_TREE);
15053 tree v2si_ftype_v2sf
15054 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15055 tree v2sf_ftype_v2si
15056 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15057 tree v2si_ftype_v2si
15058 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15059 tree v2sf_ftype_v2sf
15060 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15061 tree v2sf_ftype_v2sf_v2sf
15062 = build_function_type_list (V2SF_type_node,
15063 V2SF_type_node, V2SF_type_node, NULL_TREE);
15064 tree v2si_ftype_v2sf_v2sf
15065 = build_function_type_list (V2SI_type_node,
15066 V2SF_type_node, V2SF_type_node, NULL_TREE);
15067 tree pint_type_node = build_pointer_type (integer_type_node);
15068 tree pdouble_type_node = build_pointer_type (double_type_node);
15069 tree pcdouble_type_node = build_pointer_type (
15070 build_type_variant (double_type_node, 1, 0));
15071 tree int_ftype_v2df_v2df
15072 = build_function_type_list (integer_type_node,
15073 V2DF_type_node, V2DF_type_node, NULL_TREE);
15075 tree void_ftype_pcvoid
15076 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15077 tree v4sf_ftype_v4si
15078 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15079 tree v4si_ftype_v4sf
15080 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15081 tree v2df_ftype_v4si
15082 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15083 tree v4si_ftype_v2df
15084 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15085 tree v2si_ftype_v2df
15086 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15087 tree v4sf_ftype_v2df
15088 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15089 tree v2df_ftype_v2si
15090 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15091 tree v2df_ftype_v4sf
15092 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15093 tree int_ftype_v2df
15094 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15095 tree int64_ftype_v2df
15096 = build_function_type_list (long_long_integer_type_node,
15097 V2DF_type_node, NULL_TREE);
15098 tree v2df_ftype_v2df_int
15099 = build_function_type_list (V2DF_type_node,
15100 V2DF_type_node, integer_type_node, NULL_TREE);
15101 tree v2df_ftype_v2df_int64
15102 = build_function_type_list (V2DF_type_node,
15103 V2DF_type_node, long_long_integer_type_node,
15105 tree v4sf_ftype_v4sf_v2df
15106 = build_function_type_list (V4SF_type_node,
15107 V4SF_type_node, V2DF_type_node, NULL_TREE);
15108 tree v2df_ftype_v2df_v4sf
15109 = build_function_type_list (V2DF_type_node,
15110 V2DF_type_node, V4SF_type_node, NULL_TREE);
15111 tree v2df_ftype_v2df_v2df_int
15112 = build_function_type_list (V2DF_type_node,
15113 V2DF_type_node, V2DF_type_node,
15116 tree v2df_ftype_v2df_pcdouble
15117 = build_function_type_list (V2DF_type_node,
15118 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15119 tree void_ftype_pdouble_v2df
15120 = build_function_type_list (void_type_node,
15121 pdouble_type_node, V2DF_type_node, NULL_TREE);
15122 tree void_ftype_pint_int
15123 = build_function_type_list (void_type_node,
15124 pint_type_node, integer_type_node, NULL_TREE);
15125 tree void_ftype_v16qi_v16qi_pchar
15126 = build_function_type_list (void_type_node,
15127 V16QI_type_node, V16QI_type_node,
15128 pchar_type_node, NULL_TREE);
15129 tree v2df_ftype_pcdouble
15130 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15131 tree v2df_ftype_v2df_v2df
15132 = build_function_type_list (V2DF_type_node,
15133 V2DF_type_node, V2DF_type_node, NULL_TREE);
15134 tree v16qi_ftype_v16qi_v16qi
15135 = build_function_type_list (V16QI_type_node,
15136 V16QI_type_node, V16QI_type_node, NULL_TREE);
15137 tree v8hi_ftype_v8hi_v8hi
15138 = build_function_type_list (V8HI_type_node,
15139 V8HI_type_node, V8HI_type_node, NULL_TREE);
15140 tree v4si_ftype_v4si_v4si
15141 = build_function_type_list (V4SI_type_node,
15142 V4SI_type_node, V4SI_type_node, NULL_TREE);
15143 tree v2di_ftype_v2di_v2di
15144 = build_function_type_list (V2DI_type_node,
15145 V2DI_type_node, V2DI_type_node, NULL_TREE);
15146 tree v2di_ftype_v2df_v2df
15147 = build_function_type_list (V2DI_type_node,
15148 V2DF_type_node, V2DF_type_node, NULL_TREE);
15149 tree v2df_ftype_v2df
15150 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15151 tree v2di_ftype_v2di_int
15152 = build_function_type_list (V2DI_type_node,
15153 V2DI_type_node, integer_type_node, NULL_TREE);
15154 tree v4si_ftype_v4si_int
15155 = build_function_type_list (V4SI_type_node,
15156 V4SI_type_node, integer_type_node, NULL_TREE);
15157 tree v8hi_ftype_v8hi_int
15158 = build_function_type_list (V8HI_type_node,
15159 V8HI_type_node, integer_type_node, NULL_TREE);
15160 tree v8hi_ftype_v8hi_v2di
15161 = build_function_type_list (V8HI_type_node,
15162 V8HI_type_node, V2DI_type_node, NULL_TREE);
15163 tree v4si_ftype_v4si_v2di
15164 = build_function_type_list (V4SI_type_node,
15165 V4SI_type_node, V2DI_type_node, NULL_TREE);
15166 tree v4si_ftype_v8hi_v8hi
15167 = build_function_type_list (V4SI_type_node,
15168 V8HI_type_node, V8HI_type_node, NULL_TREE);
15169 tree di_ftype_v8qi_v8qi
15170 = build_function_type_list (long_long_unsigned_type_node,
15171 V8QI_type_node, V8QI_type_node, NULL_TREE);
15172 tree di_ftype_v2si_v2si
15173 = build_function_type_list (long_long_unsigned_type_node,
15174 V2SI_type_node, V2SI_type_node, NULL_TREE);
15175 tree v2di_ftype_v16qi_v16qi
15176 = build_function_type_list (V2DI_type_node,
15177 V16QI_type_node, V16QI_type_node, NULL_TREE);
15178 tree v2di_ftype_v4si_v4si
15179 = build_function_type_list (V2DI_type_node,
15180 V4SI_type_node, V4SI_type_node, NULL_TREE);
15181 tree int_ftype_v16qi
15182 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15183 tree v16qi_ftype_pcchar
15184 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15185 tree void_ftype_pchar_v16qi
15186 = build_function_type_list (void_type_node,
15187 pchar_type_node, V16QI_type_node, NULL_TREE);
15190 tree float128_type;
15193 /* The __float80 type. */
15194 if (TYPE_MODE (long_double_type_node) == XFmode)
15195 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15199 /* The __float80 type. */
15200 float80_type = make_node (REAL_TYPE);
15201 TYPE_PRECISION (float80_type) = 80;
15202 layout_type (float80_type);
15203 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15208 float128_type = make_node (REAL_TYPE);
15209 TYPE_PRECISION (float128_type) = 128;
15210 layout_type (float128_type);
15211 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15214 /* Add all builtins that are more or less simple operations on two
15216 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15218 /* Use one of the operands; the target can have a different mode for
15219 mask-generating compares. */
15220 enum machine_mode mode;
15225 mode = insn_data[d->icode].operand[1].mode;
15230 type = v16qi_ftype_v16qi_v16qi;
15233 type = v8hi_ftype_v8hi_v8hi;
15236 type = v4si_ftype_v4si_v4si;
15239 type = v2di_ftype_v2di_v2di;
15242 type = v2df_ftype_v2df_v2df;
15245 type = v4sf_ftype_v4sf_v4sf;
15248 type = v8qi_ftype_v8qi_v8qi;
15251 type = v4hi_ftype_v4hi_v4hi;
15254 type = v2si_ftype_v2si_v2si;
15257 type = di_ftype_di_di;
15261 gcc_unreachable ();
15264 /* Override for comparisons. */
15265 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15266 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15267 type = v4si_ftype_v4sf_v4sf;
15269 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15270 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15271 type = v2di_ftype_v2df_v2df;
15273 def_builtin (d->mask, d->name, type, d->code);
15276 /* Add the remaining MMX insns with somewhat more complicated types. */
15277 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15278 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15279 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15280 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15282 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15283 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15284 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15286 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15287 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15289 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15290 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15292 /* comi/ucomi insns. */
15293 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15294 if (d->mask == MASK_SSE2)
15295 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15297 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15299 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15300 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15301 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15303 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15304 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15305 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15306 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15307 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15308 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15309 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15310 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15311 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15312 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15313 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15315 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15317 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15318 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15320 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15321 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15322 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15323 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15325 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15326 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15327 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15328 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15330 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15332 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15334 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15335 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15336 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15337 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15338 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15339 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15341 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15343 /* Original 3DNow! */
15344 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15345 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15346 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15347 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15348 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15349 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15350 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15351 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15352 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15353 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15354 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15355 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15356 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15357 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15358 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15359 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15360 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15361 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15362 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15363 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15365 /* 3DNow! extension as used in the Athlon CPU. */
15366 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15367 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15368 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15369 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15370 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15371 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15374 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15376 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15377 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15379 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15380 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15382 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15383 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15384 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15385 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15386 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15388 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15389 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15390 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15391 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15393 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15394 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15396 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15398 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15399 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15401 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15402 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15403 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15404 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15405 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15407 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15409 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15410 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15411 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15412 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15414 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15415 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15416 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15418 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15419 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15420 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15421 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15423 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15424 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15425 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15427 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15428 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15430 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15431 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15433 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15434 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15435 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15437 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15438 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15439 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15441 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15442 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15444 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15445 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15446 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15447 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15449 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15450 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15451 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15452 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15454 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15455 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15457 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15459 /* Prescott New Instructions. */
15460 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15461 void_ftype_pcvoid_unsigned_unsigned,
15462 IX86_BUILTIN_MONITOR);
15463 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15464 void_ftype_unsigned_unsigned,
15465 IX86_BUILTIN_MWAIT);
15466 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15468 IX86_BUILTIN_MOVSHDUP);
15469 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15471 IX86_BUILTIN_MOVSLDUP);
15472 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15473 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15475 /* Access to the vec_init patterns. */
15476 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15477 integer_type_node, NULL_TREE);
15478 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15479 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15481 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15482 short_integer_type_node,
15483 short_integer_type_node,
15484 short_integer_type_node, NULL_TREE);
15485 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15486 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15488 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15489 char_type_node, char_type_node,
15490 char_type_node, char_type_node,
15491 char_type_node, char_type_node,
15492 char_type_node, NULL_TREE);
15493 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15494 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15496 /* Access to the vec_extract patterns. */
15497 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15498 integer_type_node, NULL_TREE);
15499 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15500 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15502 ftype = build_function_type_list (long_long_integer_type_node,
15503 V2DI_type_node, integer_type_node,
15505 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15506 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15508 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15509 integer_type_node, NULL_TREE);
15510 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15511 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15513 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15514 integer_type_node, NULL_TREE);
15515 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15516 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15518 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15519 integer_type_node, NULL_TREE);
15520 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15521 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15523 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15524 integer_type_node, NULL_TREE);
15525 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15526 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15528 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15529 integer_type_node, NULL_TREE);
15530 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15531 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15533 /* Access to the vec_set patterns. */
15534 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15536 integer_type_node, NULL_TREE);
15537 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15538 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15540 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15542 integer_type_node, NULL_TREE);
15543 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15544 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15547 /* Errors in the source file can cause expand_expr to return const0_rtx
15548 where we expect a vector. To avoid crashing, use one of the vector
15549 clear instructions. */
15551 safe_vector_operand (rtx x, enum machine_mode mode)
15553 if (x == const0_rtx)
15554 x = CONST0_RTX (mode);
15558 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15561 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15564 tree arg0 = TREE_VALUE (arglist);
15565 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15566 rtx op0 = expand_normal (arg0);
15567 rtx op1 = expand_normal (arg1);
15568 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15569 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15570 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15572 if (VECTOR_MODE_P (mode0))
15573 op0 = safe_vector_operand (op0, mode0);
15574 if (VECTOR_MODE_P (mode1))
15575 op1 = safe_vector_operand (op1, mode1);
15577 if (optimize || !target
15578 || GET_MODE (target) != tmode
15579 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15580 target = gen_reg_rtx (tmode);
15582 if (GET_MODE (op1) == SImode && mode1 == TImode)
15584 rtx x = gen_reg_rtx (V4SImode);
15585 emit_insn (gen_sse2_loadd (x, op1));
15586 op1 = gen_lowpart (TImode, x);
15589 /* The insn must want input operands in the same modes as the
15591 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15592 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15594 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15595 op0 = copy_to_mode_reg (mode0, op0);
15596 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15597 op1 = copy_to_mode_reg (mode1, op1);
15599 /* ??? Using ix86_fixup_binary_operands is problematic when
15600 we've got mismatched modes. Fake it. */
15606 if (tmode == mode0 && tmode == mode1)
15608 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15612 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15614 op0 = force_reg (mode0, op0);
15615 op1 = force_reg (mode1, op1);
15616 target = gen_reg_rtx (tmode);
15619 pat = GEN_FCN (icode) (target, op0, op1);
15626 /* Subroutine of ix86_expand_builtin to take care of stores. */
15629 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15632 tree arg0 = TREE_VALUE (arglist);
15633 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15634 rtx op0 = expand_normal (arg0);
15635 rtx op1 = expand_normal (arg1);
15636 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15637 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15639 if (VECTOR_MODE_P (mode1))
15640 op1 = safe_vector_operand (op1, mode1);
15642 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15643 op1 = copy_to_mode_reg (mode1, op1);
15645 pat = GEN_FCN (icode) (op0, op1);
15651 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15654 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15655 rtx target, int do_load)
15658 tree arg0 = TREE_VALUE (arglist);
15659 rtx op0 = expand_normal (arg0);
15660 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15661 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15663 if (optimize || !target
15664 || GET_MODE (target) != tmode
15665 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15666 target = gen_reg_rtx (tmode);
15668 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15671 if (VECTOR_MODE_P (mode0))
15672 op0 = safe_vector_operand (op0, mode0);
15674 if ((optimize && !register_operand (op0, mode0))
15675 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15676 op0 = copy_to_mode_reg (mode0, op0);
15679 pat = GEN_FCN (icode) (target, op0);
15686 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15687 sqrtss, rsqrtss, rcpss. */
15690 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15693 tree arg0 = TREE_VALUE (arglist);
15694 rtx op1, op0 = expand_normal (arg0);
15695 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15696 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15698 if (optimize || !target
15699 || GET_MODE (target) != tmode
15700 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15701 target = gen_reg_rtx (tmode);
15703 if (VECTOR_MODE_P (mode0))
15704 op0 = safe_vector_operand (op0, mode0);
15706 if ((optimize && !register_operand (op0, mode0))
15707 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15708 op0 = copy_to_mode_reg (mode0, op0);
15711 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15712 op1 = copy_to_mode_reg (mode0, op1);
15714 pat = GEN_FCN (icode) (target, op0, op1);
15721 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15724 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15728 tree arg0 = TREE_VALUE (arglist);
15729 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15730 rtx op0 = expand_normal (arg0);
15731 rtx op1 = expand_normal (arg1);
15733 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15734 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15735 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15736 enum rtx_code comparison = d->comparison;
15738 if (VECTOR_MODE_P (mode0))
15739 op0 = safe_vector_operand (op0, mode0);
15740 if (VECTOR_MODE_P (mode1))
15741 op1 = safe_vector_operand (op1, mode1);
15743 /* Swap operands if we have a comparison that isn't available in
15745 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15747 rtx tmp = gen_reg_rtx (mode1);
15748 emit_move_insn (tmp, op1);
15753 if (optimize || !target
15754 || GET_MODE (target) != tmode
15755 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15756 target = gen_reg_rtx (tmode);
15758 if ((optimize && !register_operand (op0, mode0))
15759 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15760 op0 = copy_to_mode_reg (mode0, op0);
15761 if ((optimize && !register_operand (op1, mode1))
15762 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15763 op1 = copy_to_mode_reg (mode1, op1);
15765 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15766 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15773 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15776 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15780 tree arg0 = TREE_VALUE (arglist);
15781 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15782 rtx op0 = expand_normal (arg0);
15783 rtx op1 = expand_normal (arg1);
15785 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15786 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15787 enum rtx_code comparison = d->comparison;
15789 if (VECTOR_MODE_P (mode0))
15790 op0 = safe_vector_operand (op0, mode0);
15791 if (VECTOR_MODE_P (mode1))
15792 op1 = safe_vector_operand (op1, mode1);
15794 /* Swap operands if we have a comparison that isn't available in
15796 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15803 target = gen_reg_rtx (SImode);
15804 emit_move_insn (target, const0_rtx);
15805 target = gen_rtx_SUBREG (QImode, target, 0);
15807 if ((optimize && !register_operand (op0, mode0))
15808 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15809 op0 = copy_to_mode_reg (mode0, op0);
15810 if ((optimize && !register_operand (op1, mode1))
15811 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15812 op1 = copy_to_mode_reg (mode1, op1);
15814 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15815 pat = GEN_FCN (d->icode) (op0, op1);
15819 emit_insn (gen_rtx_SET (VOIDmode,
15820 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15821 gen_rtx_fmt_ee (comparison, QImode,
15825 return SUBREG_REG (target);
15828 /* Return the integer constant in ARG. Constrain it to be in the range
15829 of the subparts of VEC_TYPE; issue an error if not. */
15832 get_element_number (tree vec_type, tree arg)
15834 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15836 if (!host_integerp (arg, 1)
15837 || (elt = tree_low_cst (arg, 1), elt > max))
15839 error ("selector must be an integer constant in the range 0..%wi", max);
15846 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15847 ix86_expand_vector_init. We DO have language-level syntax for this, in
15848 the form of (type){ init-list }. Except that since we can't place emms
15849 instructions from inside the compiler, we can't allow the use of MMX
15850 registers unless the user explicitly asks for it. So we do *not* define
15851 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15852 we have builtins invoked by mmintrin.h that gives us license to emit
15853 these sorts of instructions. */
15856 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15858 enum machine_mode tmode = TYPE_MODE (type);
15859 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15860 int i, n_elt = GET_MODE_NUNITS (tmode);
15861 rtvec v = rtvec_alloc (n_elt);
15863 gcc_assert (VECTOR_MODE_P (tmode));
15865 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15867 rtx x = expand_normal (TREE_VALUE (arglist));
15868 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15871 gcc_assert (arglist == NULL);
15873 if (!target || !register_operand (target, tmode))
15874 target = gen_reg_rtx (tmode);
15876 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15880 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15881 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15882 had a language-level syntax for referencing vector elements. */
15885 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15887 enum machine_mode tmode, mode0;
15892 arg0 = TREE_VALUE (arglist);
15893 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15895 op0 = expand_normal (arg0);
15896 elt = get_element_number (TREE_TYPE (arg0), arg1);
15898 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15899 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15900 gcc_assert (VECTOR_MODE_P (mode0));
15902 op0 = force_reg (mode0, op0);
15904 if (optimize || !target || !register_operand (target, tmode))
15905 target = gen_reg_rtx (tmode);
15907 ix86_expand_vector_extract (true, target, op0, elt);
15912 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15913 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15914 a language-level syntax for referencing vector elements. */
15917 ix86_expand_vec_set_builtin (tree arglist)
15919 enum machine_mode tmode, mode1;
15920 tree arg0, arg1, arg2;
15924 arg0 = TREE_VALUE (arglist);
15925 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15926 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15928 tmode = TYPE_MODE (TREE_TYPE (arg0));
15929 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15930 gcc_assert (VECTOR_MODE_P (tmode));
15932 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15933 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15934 elt = get_element_number (TREE_TYPE (arg0), arg2);
15936 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15937 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15939 op0 = force_reg (tmode, op0);
15940 op1 = force_reg (mode1, op1);
15942 ix86_expand_vector_set (true, op0, op1, elt);
15947 /* Expand an expression EXP that calls a built-in function,
15948 with result going to TARGET if that's convenient
15949 (and in mode MODE if that's convenient).
15950 SUBTARGET may be used as the target for computing one of EXP's operands.
15951 IGNORE is nonzero if the value is to be ignored. */
15954 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15955 enum machine_mode mode ATTRIBUTE_UNUSED,
15956 int ignore ATTRIBUTE_UNUSED)
15958 const struct builtin_description *d;
15960 enum insn_code icode;
15961 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
15962 tree arglist = TREE_OPERAND (exp, 1);
15963 tree arg0, arg1, arg2;
15964 rtx op0, op1, op2, pat;
15965 enum machine_mode tmode, mode0, mode1, mode2;
15966 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15970 case IX86_BUILTIN_EMMS:
15971 emit_insn (gen_mmx_emms ());
15974 case IX86_BUILTIN_SFENCE:
15975 emit_insn (gen_sse_sfence ());
15978 case IX86_BUILTIN_MASKMOVQ:
15979 case IX86_BUILTIN_MASKMOVDQU:
15980 icode = (fcode == IX86_BUILTIN_MASKMOVQ
15981 ? CODE_FOR_mmx_maskmovq
15982 : CODE_FOR_sse2_maskmovdqu);
15983 /* Note the arg order is different from the operand order. */
15984 arg1 = TREE_VALUE (arglist);
15985 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
15986 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15987 op0 = expand_normal (arg0);
15988 op1 = expand_normal (arg1);
15989 op2 = expand_normal (arg2);
15990 mode0 = insn_data[icode].operand[0].mode;
15991 mode1 = insn_data[icode].operand[1].mode;
15992 mode2 = insn_data[icode].operand[2].mode;
15994 op0 = force_reg (Pmode, op0);
15995 op0 = gen_rtx_MEM (mode1, op0);
15997 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15998 op0 = copy_to_mode_reg (mode0, op0);
15999 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16000 op1 = copy_to_mode_reg (mode1, op1);
16001 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16002 op2 = copy_to_mode_reg (mode2, op2);
16003 pat = GEN_FCN (icode) (op0, op1, op2);
16009 case IX86_BUILTIN_SQRTSS:
16010 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16011 case IX86_BUILTIN_RSQRTSS:
16012 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16013 case IX86_BUILTIN_RCPSS:
16014 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16016 case IX86_BUILTIN_LOADUPS:
16017 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16019 case IX86_BUILTIN_STOREUPS:
16020 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16022 case IX86_BUILTIN_LOADHPS:
16023 case IX86_BUILTIN_LOADLPS:
16024 case IX86_BUILTIN_LOADHPD:
16025 case IX86_BUILTIN_LOADLPD:
16026 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16027 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16028 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16029 : CODE_FOR_sse2_loadlpd);
16030 arg0 = TREE_VALUE (arglist);
16031 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16032 op0 = expand_normal (arg0);
16033 op1 = expand_normal (arg1);
16034 tmode = insn_data[icode].operand[0].mode;
16035 mode0 = insn_data[icode].operand[1].mode;
16036 mode1 = insn_data[icode].operand[2].mode;
16038 op0 = force_reg (mode0, op0);
16039 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16040 if (optimize || target == 0
16041 || GET_MODE (target) != tmode
16042 || !register_operand (target, tmode))
16043 target = gen_reg_rtx (tmode);
16044 pat = GEN_FCN (icode) (target, op0, op1);
16050 case IX86_BUILTIN_STOREHPS:
16051 case IX86_BUILTIN_STORELPS:
16052 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16053 : CODE_FOR_sse_storelps);
16054 arg0 = TREE_VALUE (arglist);
16055 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16056 op0 = expand_normal (arg0);
16057 op1 = expand_normal (arg1);
16058 mode0 = insn_data[icode].operand[0].mode;
16059 mode1 = insn_data[icode].operand[1].mode;
16061 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16062 op1 = force_reg (mode1, op1);
16064 pat = GEN_FCN (icode) (op0, op1);
16070 case IX86_BUILTIN_MOVNTPS:
16071 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16072 case IX86_BUILTIN_MOVNTQ:
16073 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16075 case IX86_BUILTIN_LDMXCSR:
16076 op0 = expand_normal (TREE_VALUE (arglist));
16077 target = assign_386_stack_local (SImode, SLOT_TEMP);
16078 emit_move_insn (target, op0);
16079 emit_insn (gen_sse_ldmxcsr (target));
16082 case IX86_BUILTIN_STMXCSR:
16083 target = assign_386_stack_local (SImode, SLOT_TEMP);
16084 emit_insn (gen_sse_stmxcsr (target));
16085 return copy_to_mode_reg (SImode, target);
16087 case IX86_BUILTIN_SHUFPS:
16088 case IX86_BUILTIN_SHUFPD:
16089 icode = (fcode == IX86_BUILTIN_SHUFPS
16090 ? CODE_FOR_sse_shufps
16091 : CODE_FOR_sse2_shufpd);
16092 arg0 = TREE_VALUE (arglist);
16093 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16094 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16095 op0 = expand_normal (arg0);
16096 op1 = expand_normal (arg1);
16097 op2 = expand_normal (arg2);
16098 tmode = insn_data[icode].operand[0].mode;
16099 mode0 = insn_data[icode].operand[1].mode;
16100 mode1 = insn_data[icode].operand[2].mode;
16101 mode2 = insn_data[icode].operand[3].mode;
16103 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16104 op0 = copy_to_mode_reg (mode0, op0);
16105 if ((optimize && !register_operand (op1, mode1))
16106 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16107 op1 = copy_to_mode_reg (mode1, op1);
16108 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16110 /* @@@ better error message */
16111 error ("mask must be an immediate");
16112 return gen_reg_rtx (tmode);
16114 if (optimize || target == 0
16115 || GET_MODE (target) != tmode
16116 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16117 target = gen_reg_rtx (tmode);
16118 pat = GEN_FCN (icode) (target, op0, op1, op2);
16124 case IX86_BUILTIN_PSHUFW:
16125 case IX86_BUILTIN_PSHUFD:
16126 case IX86_BUILTIN_PSHUFHW:
16127 case IX86_BUILTIN_PSHUFLW:
16128 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16129 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16130 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16131 : CODE_FOR_mmx_pshufw);
16132 arg0 = TREE_VALUE (arglist);
16133 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16134 op0 = expand_normal (arg0);
16135 op1 = expand_normal (arg1);
16136 tmode = insn_data[icode].operand[0].mode;
16137 mode1 = insn_data[icode].operand[1].mode;
16138 mode2 = insn_data[icode].operand[2].mode;
16140 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16141 op0 = copy_to_mode_reg (mode1, op0);
16142 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16144 /* @@@ better error message */
16145 error ("mask must be an immediate");
16149 || GET_MODE (target) != tmode
16150 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16151 target = gen_reg_rtx (tmode);
16152 pat = GEN_FCN (icode) (target, op0, op1);
16158 case IX86_BUILTIN_PSLLDQI128:
16159 case IX86_BUILTIN_PSRLDQI128:
16160 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16161 : CODE_FOR_sse2_lshrti3);
16162 arg0 = TREE_VALUE (arglist);
16163 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16164 op0 = expand_normal (arg0);
16165 op1 = expand_normal (arg1);
16166 tmode = insn_data[icode].operand[0].mode;
16167 mode1 = insn_data[icode].operand[1].mode;
16168 mode2 = insn_data[icode].operand[2].mode;
16170 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16172 op0 = copy_to_reg (op0);
16173 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16175 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16177 error ("shift must be an immediate");
16180 target = gen_reg_rtx (V2DImode);
16181 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16187 case IX86_BUILTIN_FEMMS:
16188 emit_insn (gen_mmx_femms ());
16191 case IX86_BUILTIN_PAVGUSB:
16192 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16194 case IX86_BUILTIN_PF2ID:
16195 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16197 case IX86_BUILTIN_PFACC:
16198 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16200 case IX86_BUILTIN_PFADD:
16201 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16203 case IX86_BUILTIN_PFCMPEQ:
16204 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16206 case IX86_BUILTIN_PFCMPGE:
16207 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16209 case IX86_BUILTIN_PFCMPGT:
16210 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16212 case IX86_BUILTIN_PFMAX:
16213 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16215 case IX86_BUILTIN_PFMIN:
16216 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16218 case IX86_BUILTIN_PFMUL:
16219 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16221 case IX86_BUILTIN_PFRCP:
16222 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16224 case IX86_BUILTIN_PFRCPIT1:
16225 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16227 case IX86_BUILTIN_PFRCPIT2:
16228 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16230 case IX86_BUILTIN_PFRSQIT1:
16231 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16233 case IX86_BUILTIN_PFRSQRT:
16234 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16236 case IX86_BUILTIN_PFSUB:
16237 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16239 case IX86_BUILTIN_PFSUBR:
16240 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16242 case IX86_BUILTIN_PI2FD:
16243 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16245 case IX86_BUILTIN_PMULHRW:
16246 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16248 case IX86_BUILTIN_PF2IW:
16249 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16251 case IX86_BUILTIN_PFNACC:
16252 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16254 case IX86_BUILTIN_PFPNACC:
16255 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16257 case IX86_BUILTIN_PI2FW:
16258 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16260 case IX86_BUILTIN_PSWAPDSI:
16261 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16263 case IX86_BUILTIN_PSWAPDSF:
16264 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16266 case IX86_BUILTIN_SQRTSD:
16267 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16268 case IX86_BUILTIN_LOADUPD:
16269 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16270 case IX86_BUILTIN_STOREUPD:
16271 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16273 case IX86_BUILTIN_MFENCE:
16274 emit_insn (gen_sse2_mfence ());
16276 case IX86_BUILTIN_LFENCE:
16277 emit_insn (gen_sse2_lfence ());
16280 case IX86_BUILTIN_CLFLUSH:
16281 arg0 = TREE_VALUE (arglist);
16282 op0 = expand_normal (arg0);
16283 icode = CODE_FOR_sse2_clflush;
16284 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16285 op0 = copy_to_mode_reg (Pmode, op0);
16287 emit_insn (gen_sse2_clflush (op0));
16290 case IX86_BUILTIN_MOVNTPD:
16291 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16292 case IX86_BUILTIN_MOVNTDQ:
16293 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16294 case IX86_BUILTIN_MOVNTI:
16295 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16297 case IX86_BUILTIN_LOADDQU:
16298 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16299 case IX86_BUILTIN_STOREDQU:
16300 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16302 case IX86_BUILTIN_MONITOR:
16303 arg0 = TREE_VALUE (arglist);
16304 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16305 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16306 op0 = expand_normal (arg0);
16307 op1 = expand_normal (arg1);
16308 op2 = expand_normal (arg2);
16310 op0 = copy_to_mode_reg (Pmode, op0);
16312 op1 = copy_to_mode_reg (SImode, op1);
16314 op2 = copy_to_mode_reg (SImode, op2);
16316 emit_insn (gen_sse3_monitor (op0, op1, op2));
16318 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16321 case IX86_BUILTIN_MWAIT:
16322 arg0 = TREE_VALUE (arglist);
16323 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16324 op0 = expand_normal (arg0);
16325 op1 = expand_normal (arg1);
16327 op0 = copy_to_mode_reg (SImode, op0);
16329 op1 = copy_to_mode_reg (SImode, op1);
16330 emit_insn (gen_sse3_mwait (op0, op1));
16333 case IX86_BUILTIN_LDDQU:
16334 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16337 case IX86_BUILTIN_VEC_INIT_V2SI:
16338 case IX86_BUILTIN_VEC_INIT_V4HI:
16339 case IX86_BUILTIN_VEC_INIT_V8QI:
16340 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16342 case IX86_BUILTIN_VEC_EXT_V2DF:
16343 case IX86_BUILTIN_VEC_EXT_V2DI:
16344 case IX86_BUILTIN_VEC_EXT_V4SF:
16345 case IX86_BUILTIN_VEC_EXT_V4SI:
16346 case IX86_BUILTIN_VEC_EXT_V8HI:
16347 case IX86_BUILTIN_VEC_EXT_V2SI:
16348 case IX86_BUILTIN_VEC_EXT_V4HI:
16349 return ix86_expand_vec_ext_builtin (arglist, target);
16351 case IX86_BUILTIN_VEC_SET_V8HI:
16352 case IX86_BUILTIN_VEC_SET_V4HI:
16353 return ix86_expand_vec_set_builtin (arglist);
16359 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16360 if (d->code == fcode)
16362 /* Compares are treated specially. */
16363 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16364 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16365 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16366 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16367 return ix86_expand_sse_compare (d, arglist, target);
16369 return ix86_expand_binop_builtin (d->icode, arglist, target);
16372 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16373 if (d->code == fcode)
16374 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16376 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16377 if (d->code == fcode)
16378 return ix86_expand_sse_comi (d, arglist, target);
16380 gcc_unreachable ();
16383 /* Store OPERAND to the memory after reload is completed. This means
16384 that we can't easily use assign_stack_local. */
16386 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16390 gcc_assert (reload_completed);
16391 if (TARGET_RED_ZONE)
16393 result = gen_rtx_MEM (mode,
16394 gen_rtx_PLUS (Pmode,
16396 GEN_INT (-RED_ZONE_SIZE)));
16397 emit_move_insn (result, operand);
16399 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16405 operand = gen_lowpart (DImode, operand);
16409 gen_rtx_SET (VOIDmode,
16410 gen_rtx_MEM (DImode,
16411 gen_rtx_PRE_DEC (DImode,
16412 stack_pointer_rtx)),
16416 gcc_unreachable ();
16418 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16427 split_di (&operand, 1, operands, operands + 1);
16429 gen_rtx_SET (VOIDmode,
16430 gen_rtx_MEM (SImode,
16431 gen_rtx_PRE_DEC (Pmode,
16432 stack_pointer_rtx)),
16435 gen_rtx_SET (VOIDmode,
16436 gen_rtx_MEM (SImode,
16437 gen_rtx_PRE_DEC (Pmode,
16438 stack_pointer_rtx)),
16443 /* Store HImodes as SImodes. */
16444 operand = gen_lowpart (SImode, operand);
16448 gen_rtx_SET (VOIDmode,
16449 gen_rtx_MEM (GET_MODE (operand),
16450 gen_rtx_PRE_DEC (SImode,
16451 stack_pointer_rtx)),
16455 gcc_unreachable ();
16457 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16462 /* Free operand from the memory. */
16464 ix86_free_from_memory (enum machine_mode mode)
16466 if (!TARGET_RED_ZONE)
16470 if (mode == DImode || TARGET_64BIT)
16474 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16475 to pop or add instruction if registers are available. */
16476 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16477 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16482 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16483 QImode must go into class Q_REGS.
16484 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16485 movdf to do mem-to-mem moves through integer regs. */
16487 ix86_preferred_reload_class (rtx x, enum reg_class class)
16489 enum machine_mode mode = GET_MODE (x);
16491 /* We're only allowed to return a subclass of CLASS. Many of the
16492 following checks fail for NO_REGS, so eliminate that early. */
16493 if (class == NO_REGS)
16496 /* All classes can load zeros. */
16497 if (x == CONST0_RTX (mode))
16500 /* Force constants into memory if we are loading a (nonzero) constant into
16501 an MMX or SSE register. This is because there are no MMX/SSE instructions
16502 to load from a constant. */
16504 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16507 /* Prefer SSE regs only, if we can use them for math. */
16508 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16509 return SSE_CLASS_P (class) ? class : NO_REGS;
16511 /* Floating-point constants need more complex checks. */
16512 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16514 /* General regs can load everything. */
16515 if (reg_class_subset_p (class, GENERAL_REGS))
16518 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16519 zero above. We only want to wind up preferring 80387 registers if
16520 we plan on doing computation with them. */
16522 && standard_80387_constant_p (x))
16524 /* Limit class to non-sse. */
16525 if (class == FLOAT_SSE_REGS)
16527 if (class == FP_TOP_SSE_REGS)
16529 if (class == FP_SECOND_SSE_REGS)
16530 return FP_SECOND_REG;
16531 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16538 /* Generally when we see PLUS here, it's the function invariant
16539 (plus soft-fp const_int). Which can only be computed into general
16541 if (GET_CODE (x) == PLUS)
16542 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16544 /* QImode constants are easy to load, but non-constant QImode data
16545 must go into Q_REGS. */
16546 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16548 if (reg_class_subset_p (class, Q_REGS))
16550 if (reg_class_subset_p (Q_REGS, class))
16558 /* Discourage putting floating-point values in SSE registers unless
16559 SSE math is being used, and likewise for the 387 registers. */
16561 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16563 enum machine_mode mode = GET_MODE (x);
16565 /* Restrict the output reload class to the register bank that we are doing
16566 math on. If we would like not to return a subset of CLASS, reject this
16567 alternative: if reload cannot do this, it will still use its choice. */
16568 mode = GET_MODE (x);
16569 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16570 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16572 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16574 if (class == FP_TOP_SSE_REGS)
16576 else if (class == FP_SECOND_SSE_REGS)
16577 return FP_SECOND_REG;
16579 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16585 /* If we are copying between general and FP registers, we need a memory
16586 location. The same is true for SSE and MMX registers.
16588 The macro can't work reliably when one of the CLASSES is class containing
16589 registers from multiple units (SSE, MMX, integer). We avoid this by never
16590 combining those units in single alternative in the machine description.
16591 Ensure that this constraint holds to avoid unexpected surprises.
16593 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16594 enforce these sanity checks. */
16597 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16598 enum machine_mode mode, int strict)
16600 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16601 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16602 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16603 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16604 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16605 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16607 gcc_assert (!strict);
16611 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16614 /* ??? This is a lie. We do have moves between mmx/general, and for
16615 mmx/sse2. But by saying we need secondary memory we discourage the
16616 register allocator from using the mmx registers unless needed. */
16617 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16620 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16622 /* SSE1 doesn't have any direct moves from other classes. */
16626 /* If the target says that inter-unit moves are more expensive
16627 than moving through memory, then don't generate them. */
16628 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16631 /* Between SSE and general, we have moves no larger than word size. */
16632 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16635 /* ??? For the cost of one register reformat penalty, we could use
16636 the same instructions to move SFmode and DFmode data, but the
16637 relevant move patterns don't support those alternatives. */
16638 if (mode == SFmode || mode == DFmode)
16645 /* Return true if the registers in CLASS cannot represent the change from
16646 modes FROM to TO. */
16649 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16650 enum reg_class class)
16655 /* x87 registers can't do subreg at all, as all values are reformatted
16656 to extended precision. */
16657 if (MAYBE_FLOAT_CLASS_P (class))
16660 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16662 /* Vector registers do not support QI or HImode loads. If we don't
16663 disallow a change to these modes, reload will assume it's ok to
16664 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16665 the vec_dupv4hi pattern. */
16666 if (GET_MODE_SIZE (from) < 4)
16669 /* Vector registers do not support subreg with nonzero offsets, which
16670 are otherwise valid for integer registers. Since we can't see
16671 whether we have a nonzero offset from here, prohibit all
16672 nonparadoxical subregs changing size. */
16673 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16680 /* Return the cost of moving data from a register in class CLASS1 to
16681 one in class CLASS2.
16683 It is not required that the cost always equal 2 when FROM is the same as TO;
16684 on some machines it is expensive to move between registers if they are not
16685 general registers. */
16688 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16689 enum reg_class class2)
16691 /* In case we require secondary memory, compute cost of the store followed
16692 by load. In order to avoid bad register allocation choices, we need
16693 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16695 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16699 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16700 MEMORY_MOVE_COST (mode, class1, 1));
16701 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16702 MEMORY_MOVE_COST (mode, class2, 1));
16704 /* In case of copying from general_purpose_register we may emit multiple
16705 stores followed by single load causing memory size mismatch stall.
16706 Count this as arbitrarily high cost of 20. */
16707 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16710 /* In the case of FP/MMX moves, the registers actually overlap, and we
16711 have to switch modes in order to treat them differently. */
16712 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16713 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16719 /* Moves between SSE/MMX and integer unit are expensive. */
16720 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16721 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16722 return ix86_cost->mmxsse_to_integer;
16723 if (MAYBE_FLOAT_CLASS_P (class1))
16724 return ix86_cost->fp_move;
16725 if (MAYBE_SSE_CLASS_P (class1))
16726 return ix86_cost->sse_move;
16727 if (MAYBE_MMX_CLASS_P (class1))
16728 return ix86_cost->mmx_move;
16732 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16735 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16737 /* Flags and only flags can only hold CCmode values. */
16738 if (CC_REGNO_P (regno))
16739 return GET_MODE_CLASS (mode) == MODE_CC;
16740 if (GET_MODE_CLASS (mode) == MODE_CC
16741 || GET_MODE_CLASS (mode) == MODE_RANDOM
16742 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16744 if (FP_REGNO_P (regno))
16745 return VALID_FP_MODE_P (mode);
16746 if (SSE_REGNO_P (regno))
16748 /* We implement the move patterns for all vector modes into and
16749 out of SSE registers, even when no operation instructions
16751 return (VALID_SSE_REG_MODE (mode)
16752 || VALID_SSE2_REG_MODE (mode)
16753 || VALID_MMX_REG_MODE (mode)
16754 || VALID_MMX_REG_MODE_3DNOW (mode));
16756 if (MMX_REGNO_P (regno))
16758 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16759 so if the register is available at all, then we can move data of
16760 the given mode into or out of it. */
16761 return (VALID_MMX_REG_MODE (mode)
16762 || VALID_MMX_REG_MODE_3DNOW (mode));
16765 if (mode == QImode)
16767 /* Take care for QImode values - they can be in non-QI regs,
16768 but then they do cause partial register stalls. */
16769 if (regno < 4 || TARGET_64BIT)
16771 if (!TARGET_PARTIAL_REG_STALL)
16773 return reload_in_progress || reload_completed;
16775 /* We handle both integer and floats in the general purpose registers. */
16776 else if (VALID_INT_MODE_P (mode))
16778 else if (VALID_FP_MODE_P (mode))
16780 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16781 on to use that value in smaller contexts, this can easily force a
16782 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16783 supporting DImode, allow it. */
16784 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16790 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16791 tieable integer mode. */
16794 ix86_tieable_integer_mode_p (enum machine_mode mode)
16803 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16806 return TARGET_64BIT;
16813 /* Return true if MODE1 is accessible in a register that can hold MODE2
16814 without copying. That is, all register classes that can hold MODE2
16815 can also hold MODE1. */
16818 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16820 if (mode1 == mode2)
16823 if (ix86_tieable_integer_mode_p (mode1)
16824 && ix86_tieable_integer_mode_p (mode2))
16827 /* MODE2 being XFmode implies fp stack or general regs, which means we
16828 can tie any smaller floating point modes to it. Note that we do not
16829 tie this with TFmode. */
16830 if (mode2 == XFmode)
16831 return mode1 == SFmode || mode1 == DFmode;
16833 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16834 that we can tie it with SFmode. */
16835 if (mode2 == DFmode)
16836 return mode1 == SFmode;
16838 /* If MODE2 is only appropriate for an SSE register, then tie with
16839 any other mode acceptable to SSE registers. */
16840 if (GET_MODE_SIZE (mode2) >= 8
16841 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16842 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16844 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16845 with any other mode acceptable to MMX registers. */
16846 if (GET_MODE_SIZE (mode2) == 8
16847 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
16848 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
16853 /* Return the cost of moving data of mode M between a
16854 register and memory. A value of 2 is the default; this cost is
16855 relative to those in `REGISTER_MOVE_COST'.
16857 If moving between registers and memory is more expensive than
16858 between two registers, you should define this macro to express the
16861 Model also increased moving costs of QImode registers in non
16865 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
16867 if (FLOAT_CLASS_P (class))
16884 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
16886 if (SSE_CLASS_P (class))
16889 switch (GET_MODE_SIZE (mode))
16903 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
16905 if (MMX_CLASS_P (class))
16908 switch (GET_MODE_SIZE (mode))
16919 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
16921 switch (GET_MODE_SIZE (mode))
16925 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
16926 : ix86_cost->movzbl_load);
16928 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
16929 : ix86_cost->int_store[0] + 4);
16932 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
16934 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
16935 if (mode == TFmode)
16937 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
16938 * (((int) GET_MODE_SIZE (mode)
16939 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
16943 /* Compute a (partial) cost for rtx X. Return true if the complete
16944 cost has been computed, and false if subexpressions should be
16945 scanned. In either case, *TOTAL contains the cost result. */
16948 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
16950 enum machine_mode mode = GET_MODE (x);
16958 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
16960 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
16962 else if (flag_pic && SYMBOLIC_CONST (x)
16964 || (!GET_CODE (x) != LABEL_REF
16965 && (GET_CODE (x) != SYMBOL_REF
16966 || !SYMBOL_REF_LOCAL_P (x)))))
16973 if (mode == VOIDmode)
16976 switch (standard_80387_constant_p (x))
16981 default: /* Other constants */
16986 /* Start with (MEM (SYMBOL_REF)), since that's where
16987 it'll probably end up. Add a penalty for size. */
16988 *total = (COSTS_N_INSNS (1)
16989 + (flag_pic != 0 && !TARGET_64BIT)
16990 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
16996 /* The zero extensions is often completely free on x86_64, so make
16997 it as cheap as possible. */
16998 if (TARGET_64BIT && mode == DImode
16999 && GET_MODE (XEXP (x, 0)) == SImode)
17001 else if (TARGET_ZERO_EXTEND_WITH_AND)
17002 *total = ix86_cost->add;
17004 *total = ix86_cost->movzx;
17008 *total = ix86_cost->movsx;
17012 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17013 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17015 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17018 *total = ix86_cost->add;
17021 if ((value == 2 || value == 3)
17022 && ix86_cost->lea <= ix86_cost->shift_const)
17024 *total = ix86_cost->lea;
17034 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17036 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17038 if (INTVAL (XEXP (x, 1)) > 32)
17039 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17041 *total = ix86_cost->shift_const * 2;
17045 if (GET_CODE (XEXP (x, 1)) == AND)
17046 *total = ix86_cost->shift_var * 2;
17048 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17053 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17054 *total = ix86_cost->shift_const;
17056 *total = ix86_cost->shift_var;
17061 if (FLOAT_MODE_P (mode))
17063 *total = ix86_cost->fmul;
17068 rtx op0 = XEXP (x, 0);
17069 rtx op1 = XEXP (x, 1);
17071 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17073 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17074 for (nbits = 0; value != 0; value &= value - 1)
17078 /* This is arbitrary. */
17081 /* Compute costs correctly for widening multiplication. */
17082 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17083 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17084 == GET_MODE_SIZE (mode))
17086 int is_mulwiden = 0;
17087 enum machine_mode inner_mode = GET_MODE (op0);
17089 if (GET_CODE (op0) == GET_CODE (op1))
17090 is_mulwiden = 1, op1 = XEXP (op1, 0);
17091 else if (GET_CODE (op1) == CONST_INT)
17093 if (GET_CODE (op0) == SIGN_EXTEND)
17094 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17097 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17101 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17104 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17105 + nbits * ix86_cost->mult_bit
17106 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17115 if (FLOAT_MODE_P (mode))
17116 *total = ix86_cost->fdiv;
17118 *total = ix86_cost->divide[MODE_INDEX (mode)];
17122 if (FLOAT_MODE_P (mode))
17123 *total = ix86_cost->fadd;
17124 else if (GET_MODE_CLASS (mode) == MODE_INT
17125 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17127 if (GET_CODE (XEXP (x, 0)) == PLUS
17128 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17129 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17130 && CONSTANT_P (XEXP (x, 1)))
17132 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17133 if (val == 2 || val == 4 || val == 8)
17135 *total = ix86_cost->lea;
17136 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17137 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17139 *total += rtx_cost (XEXP (x, 1), outer_code);
17143 else if (GET_CODE (XEXP (x, 0)) == MULT
17144 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17146 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17147 if (val == 2 || val == 4 || val == 8)
17149 *total = ix86_cost->lea;
17150 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17151 *total += rtx_cost (XEXP (x, 1), outer_code);
17155 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17157 *total = ix86_cost->lea;
17158 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17159 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17160 *total += rtx_cost (XEXP (x, 1), outer_code);
17167 if (FLOAT_MODE_P (mode))
17169 *total = ix86_cost->fadd;
17177 if (!TARGET_64BIT && mode == DImode)
17179 *total = (ix86_cost->add * 2
17180 + (rtx_cost (XEXP (x, 0), outer_code)
17181 << (GET_MODE (XEXP (x, 0)) != DImode))
17182 + (rtx_cost (XEXP (x, 1), outer_code)
17183 << (GET_MODE (XEXP (x, 1)) != DImode)));
17189 if (FLOAT_MODE_P (mode))
17191 *total = ix86_cost->fchs;
17197 if (!TARGET_64BIT && mode == DImode)
17198 *total = ix86_cost->add * 2;
17200 *total = ix86_cost->add;
17204 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17205 && XEXP (XEXP (x, 0), 1) == const1_rtx
17206 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17207 && XEXP (x, 1) == const0_rtx)
17209 /* This kind of construct is implemented using test[bwl].
17210 Treat it as if we had an AND. */
17211 *total = (ix86_cost->add
17212 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17213 + rtx_cost (const1_rtx, outer_code));
17219 if (!TARGET_SSE_MATH
17221 || (mode == DFmode && !TARGET_SSE2))
17222 /* For standard 80387 constants, raise the cost to prevent
17223 compress_float_constant() to generate load from memory. */
17224 switch (standard_80387_constant_p (XEXP (x, 0)))
17234 *total = (x86_ext_80387_constants & TUNEMASK
17241 if (FLOAT_MODE_P (mode))
17242 *total = ix86_cost->fabs;
17246 if (FLOAT_MODE_P (mode))
17247 *total = ix86_cost->fsqrt;
17251 if (XINT (x, 1) == UNSPEC_TP)
17262 static int current_machopic_label_num;
17264 /* Given a symbol name and its associated stub, write out the
17265 definition of the stub. */
17268 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17270 unsigned int length;
17271 char *binder_name, *symbol_name, lazy_ptr_name[32];
17272 int label = ++current_machopic_label_num;
17274 /* For 64-bit we shouldn't get here. */
17275 gcc_assert (!TARGET_64BIT);
17277 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17278 symb = (*targetm.strip_name_encoding) (symb);
17280 length = strlen (stub);
17281 binder_name = alloca (length + 32);
17282 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17284 length = strlen (symb);
17285 symbol_name = alloca (length + 32);
17286 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17288 sprintf (lazy_ptr_name, "L%d$lz", label);
17291 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17293 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17295 fprintf (file, "%s:\n", stub);
17296 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17300 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17301 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17302 fprintf (file, "\tjmp\t*%%edx\n");
17305 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17307 fprintf (file, "%s:\n", binder_name);
17311 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17312 fprintf (file, "\tpushl\t%%eax\n");
17315 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17317 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17319 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17320 fprintf (file, "%s:\n", lazy_ptr_name);
17321 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17322 fprintf (file, "\t.long %s\n", binder_name);
17326 darwin_x86_file_end (void)
17328 darwin_file_end ();
17331 #endif /* TARGET_MACHO */
17333 /* Order the registers for register allocator. */
17336 x86_order_regs_for_local_alloc (void)
17341 /* First allocate the local general purpose registers. */
17342 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17343 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17344 reg_alloc_order [pos++] = i;
17346 /* Global general purpose registers. */
17347 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17348 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17349 reg_alloc_order [pos++] = i;
17351 /* x87 registers come first in case we are doing FP math
17353 if (!TARGET_SSE_MATH)
17354 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17355 reg_alloc_order [pos++] = i;
17357 /* SSE registers. */
17358 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17359 reg_alloc_order [pos++] = i;
17360 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17361 reg_alloc_order [pos++] = i;
17363 /* x87 registers. */
17364 if (TARGET_SSE_MATH)
17365 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17366 reg_alloc_order [pos++] = i;
17368 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17369 reg_alloc_order [pos++] = i;
17371 /* Initialize the rest of array as we do not allocate some registers
17373 while (pos < FIRST_PSEUDO_REGISTER)
17374 reg_alloc_order [pos++] = 0;
17377 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17378 struct attribute_spec.handler. */
17380 ix86_handle_struct_attribute (tree *node, tree name,
17381 tree args ATTRIBUTE_UNUSED,
17382 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17385 if (DECL_P (*node))
17387 if (TREE_CODE (*node) == TYPE_DECL)
17388 type = &TREE_TYPE (*node);
17393 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17394 || TREE_CODE (*type) == UNION_TYPE)))
17396 warning (OPT_Wattributes, "%qs attribute ignored",
17397 IDENTIFIER_POINTER (name));
17398 *no_add_attrs = true;
17401 else if ((is_attribute_p ("ms_struct", name)
17402 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17403 || ((is_attribute_p ("gcc_struct", name)
17404 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17406 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17407 IDENTIFIER_POINTER (name));
17408 *no_add_attrs = true;
17415 ix86_ms_bitfield_layout_p (tree record_type)
17417 return (TARGET_MS_BITFIELD_LAYOUT &&
17418 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17419 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17422 /* Returns an expression indicating where the this parameter is
17423 located on entry to the FUNCTION. */
17426 x86_this_parameter (tree function)
17428 tree type = TREE_TYPE (function);
17432 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17433 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17436 if (ix86_function_regparm (type, function) > 0)
17440 parm = TYPE_ARG_TYPES (type);
17441 /* Figure out whether or not the function has a variable number of
17443 for (; parm; parm = TREE_CHAIN (parm))
17444 if (TREE_VALUE (parm) == void_type_node)
17446 /* If not, the this parameter is in the first argument. */
17450 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17452 return gen_rtx_REG (SImode, regno);
17456 if (aggregate_value_p (TREE_TYPE (type), type))
17457 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17459 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17462 /* Determine whether x86_output_mi_thunk can succeed. */
17465 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17466 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17467 HOST_WIDE_INT vcall_offset, tree function)
17469 /* 64-bit can handle anything. */
17473 /* For 32-bit, everything's fine if we have one free register. */
17474 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17477 /* Need a free register for vcall_offset. */
17481 /* Need a free register for GOT references. */
17482 if (flag_pic && !(*targetm.binds_local_p) (function))
17485 /* Otherwise ok. */
17489 /* Output the assembler code for a thunk function. THUNK_DECL is the
17490 declaration for the thunk function itself, FUNCTION is the decl for
17491 the target function. DELTA is an immediate constant offset to be
17492 added to THIS. If VCALL_OFFSET is nonzero, the word at
17493 *(*this + vcall_offset) should be added to THIS. */
17496 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17497 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17498 HOST_WIDE_INT vcall_offset, tree function)
17501 rtx this = x86_this_parameter (function);
17504 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17505 pull it in now and let DELTA benefit. */
17508 else if (vcall_offset)
17510 /* Put the this parameter into %eax. */
17512 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17513 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17516 this_reg = NULL_RTX;
17518 /* Adjust the this parameter by a fixed constant. */
17521 xops[0] = GEN_INT (delta);
17522 xops[1] = this_reg ? this_reg : this;
17525 if (!x86_64_general_operand (xops[0], DImode))
17527 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17529 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17533 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17536 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17539 /* Adjust the this parameter by a value stored in the vtable. */
17543 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17546 int tmp_regno = 2 /* ECX */;
17547 if (lookup_attribute ("fastcall",
17548 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17549 tmp_regno = 0 /* EAX */;
17550 tmp = gen_rtx_REG (SImode, tmp_regno);
17553 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17556 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17558 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17560 /* Adjust the this parameter. */
17561 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17562 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17564 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17565 xops[0] = GEN_INT (vcall_offset);
17567 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17568 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17570 xops[1] = this_reg;
17572 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17574 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17577 /* If necessary, drop THIS back to its stack slot. */
17578 if (this_reg && this_reg != this)
17580 xops[0] = this_reg;
17582 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17585 xops[0] = XEXP (DECL_RTL (function), 0);
17588 if (!flag_pic || (*targetm.binds_local_p) (function))
17589 output_asm_insn ("jmp\t%P0", xops);
17592 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17593 tmp = gen_rtx_CONST (Pmode, tmp);
17594 tmp = gen_rtx_MEM (QImode, tmp);
17596 output_asm_insn ("jmp\t%A0", xops);
17601 if (!flag_pic || (*targetm.binds_local_p) (function))
17602 output_asm_insn ("jmp\t%P0", xops);
17607 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17608 tmp = (gen_rtx_SYMBOL_REF
17610 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17611 tmp = gen_rtx_MEM (QImode, tmp);
17613 output_asm_insn ("jmp\t%0", xops);
17616 #endif /* TARGET_MACHO */
17618 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17619 output_set_got (tmp, NULL_RTX);
17622 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17623 output_asm_insn ("jmp\t{*}%1", xops);
17629 x86_file_start (void)
17631 default_file_start ();
17633 darwin_file_start ();
17635 if (X86_FILE_START_VERSION_DIRECTIVE)
17636 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17637 if (X86_FILE_START_FLTUSED)
17638 fputs ("\t.global\t__fltused\n", asm_out_file);
17639 if (ix86_asm_dialect == ASM_INTEL)
17640 fputs ("\t.intel_syntax\n", asm_out_file);
17644 x86_field_alignment (tree field, int computed)
17646 enum machine_mode mode;
17647 tree type = TREE_TYPE (field);
17649 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17651 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17652 ? get_inner_array_type (type) : type);
17653 if (mode == DFmode || mode == DCmode
17654 || GET_MODE_CLASS (mode) == MODE_INT
17655 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17656 return MIN (32, computed);
17660 /* Output assembler code to FILE to increment profiler label # LABELNO
17661 for profiling a function entry. */
17663 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17668 #ifndef NO_PROFILE_COUNTERS
17669 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17671 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17675 #ifndef NO_PROFILE_COUNTERS
17676 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17678 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17682 #ifndef NO_PROFILE_COUNTERS
17683 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17684 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17686 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17690 #ifndef NO_PROFILE_COUNTERS
17691 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17692 PROFILE_COUNT_REGISTER);
17694 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17698 /* We don't have exact information about the insn sizes, but we may assume
17699 quite safely that we are informed about all 1 byte insns and memory
17700 address sizes. This is enough to eliminate unnecessary padding in
17704 min_insn_size (rtx insn)
17708 if (!INSN_P (insn) || !active_insn_p (insn))
17711 /* Discard alignments we've emit and jump instructions. */
17712 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17713 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17715 if (GET_CODE (insn) == JUMP_INSN
17716 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17717 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17720 /* Important case - calls are always 5 bytes.
17721 It is common to have many calls in the row. */
17722 if (GET_CODE (insn) == CALL_INSN
17723 && symbolic_reference_mentioned_p (PATTERN (insn))
17724 && !SIBLING_CALL_P (insn))
17726 if (get_attr_length (insn) <= 1)
17729 /* For normal instructions we may rely on the sizes of addresses
17730 and the presence of symbol to require 4 bytes of encoding.
17731 This is not the case for jumps where references are PC relative. */
17732 if (GET_CODE (insn) != JUMP_INSN)
17734 l = get_attr_length_address (insn);
17735 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17744 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17748 ix86_avoid_jump_misspredicts (void)
17750 rtx insn, start = get_insns ();
17751 int nbytes = 0, njumps = 0;
17754 /* Look for all minimal intervals of instructions containing 4 jumps.
17755 The intervals are bounded by START and INSN. NBYTES is the total
17756 size of instructions in the interval including INSN and not including
17757 START. When the NBYTES is smaller than 16 bytes, it is possible
17758 that the end of START and INSN ends up in the same 16byte page.
17760 The smallest offset in the page INSN can start is the case where START
17761 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17762 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17764 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17767 nbytes += min_insn_size (insn);
17769 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17770 INSN_UID (insn), min_insn_size (insn));
17771 if ((GET_CODE (insn) == JUMP_INSN
17772 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17773 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17774 || GET_CODE (insn) == CALL_INSN)
17781 start = NEXT_INSN (start);
17782 if ((GET_CODE (start) == JUMP_INSN
17783 && GET_CODE (PATTERN (start)) != ADDR_VEC
17784 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17785 || GET_CODE (start) == CALL_INSN)
17786 njumps--, isjump = 1;
17789 nbytes -= min_insn_size (start);
17791 gcc_assert (njumps >= 0);
17793 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17794 INSN_UID (start), INSN_UID (insn), nbytes);
17796 if (njumps == 3 && isjump && nbytes < 16)
17798 int padsize = 15 - nbytes + min_insn_size (insn);
17801 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17802 INSN_UID (insn), padsize);
17803 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17808 /* AMD Athlon works faster
17809 when RET is not destination of conditional jump or directly preceded
17810 by other jump instruction. We avoid the penalty by inserting NOP just
17811 before the RET instructions in such cases. */
17813 ix86_pad_returns (void)
17818 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17820 basic_block bb = e->src;
17821 rtx ret = BB_END (bb);
17823 bool replace = false;
17825 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17826 || !maybe_hot_bb_p (bb))
17828 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17829 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17831 if (prev && GET_CODE (prev) == CODE_LABEL)
17836 FOR_EACH_EDGE (e, ei, bb->preds)
17837 if (EDGE_FREQUENCY (e) && e->src->index >= 0
17838 && !(e->flags & EDGE_FALLTHRU))
17843 prev = prev_active_insn (ret);
17845 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17846 || GET_CODE (prev) == CALL_INSN))
17848 /* Empty functions get branch mispredict even when the jump destination
17849 is not visible to us. */
17850 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
17855 emit_insn_before (gen_return_internal_long (), ret);
17861 /* Implement machine specific optimizations. We implement padding of returns
17862 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
17866 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
17867 ix86_pad_returns ();
17868 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
17869 ix86_avoid_jump_misspredicts ();
17872 /* Return nonzero when QImode register that must be represented via REX prefix
17875 x86_extended_QIreg_mentioned_p (rtx insn)
17878 extract_insn_cached (insn);
17879 for (i = 0; i < recog_data.n_operands; i++)
17880 if (REG_P (recog_data.operand[i])
17881 && REGNO (recog_data.operand[i]) >= 4)
17886 /* Return nonzero when P points to register encoded via REX prefix.
17887 Called via for_each_rtx. */
17889 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
17891 unsigned int regno;
17894 regno = REGNO (*p);
17895 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
17898 /* Return true when INSN mentions register that must be encoded using REX
17901 x86_extended_reg_mentioned_p (rtx insn)
17903 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
17906 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
17907 optabs would emit if we didn't have TFmode patterns. */
17910 x86_emit_floatuns (rtx operands[2])
17912 rtx neglab, donelab, i0, i1, f0, in, out;
17913 enum machine_mode mode, inmode;
17915 inmode = GET_MODE (operands[1]);
17916 gcc_assert (inmode == SImode || inmode == DImode);
17919 in = force_reg (inmode, operands[1]);
17920 mode = GET_MODE (out);
17921 neglab = gen_label_rtx ();
17922 donelab = gen_label_rtx ();
17923 i1 = gen_reg_rtx (Pmode);
17924 f0 = gen_reg_rtx (mode);
17926 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
17928 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
17929 emit_jump_insn (gen_jump (donelab));
17932 emit_label (neglab);
17934 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17935 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17936 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
17937 expand_float (f0, i0, 0);
17938 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
17940 emit_label (donelab);
17943 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17944 with all elements equal to VAR. Return true if successful. */
17947 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
17948 rtx target, rtx val)
17950 enum machine_mode smode, wsmode, wvmode;
17965 val = force_reg (GET_MODE_INNER (mode), val);
17966 x = gen_rtx_VEC_DUPLICATE (mode, val);
17967 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17973 if (TARGET_SSE || TARGET_3DNOW_A)
17975 val = gen_lowpart (SImode, val);
17976 x = gen_rtx_TRUNCATE (HImode, val);
17977 x = gen_rtx_VEC_DUPLICATE (mode, x);
17978 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18000 /* Extend HImode to SImode using a paradoxical SUBREG. */
18001 tmp1 = gen_reg_rtx (SImode);
18002 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18003 /* Insert the SImode value as low element of V4SImode vector. */
18004 tmp2 = gen_reg_rtx (V4SImode);
18005 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18006 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18007 CONST0_RTX (V4SImode),
18009 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18010 /* Cast the V4SImode vector back to a V8HImode vector. */
18011 tmp1 = gen_reg_rtx (V8HImode);
18012 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18013 /* Duplicate the low short through the whole low SImode word. */
18014 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18015 /* Cast the V8HImode vector back to a V4SImode vector. */
18016 tmp2 = gen_reg_rtx (V4SImode);
18017 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18018 /* Replicate the low element of the V4SImode vector. */
18019 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18020 /* Cast the V2SImode back to V8HImode, and store in target. */
18021 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18032 /* Extend QImode to SImode using a paradoxical SUBREG. */
18033 tmp1 = gen_reg_rtx (SImode);
18034 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18035 /* Insert the SImode value as low element of V4SImode vector. */
18036 tmp2 = gen_reg_rtx (V4SImode);
18037 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18038 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18039 CONST0_RTX (V4SImode),
18041 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18042 /* Cast the V4SImode vector back to a V16QImode vector. */
18043 tmp1 = gen_reg_rtx (V16QImode);
18044 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18045 /* Duplicate the low byte through the whole low SImode word. */
18046 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18047 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18048 /* Cast the V16QImode vector back to a V4SImode vector. */
18049 tmp2 = gen_reg_rtx (V4SImode);
18050 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18051 /* Replicate the low element of the V4SImode vector. */
18052 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18053 /* Cast the V2SImode back to V16QImode, and store in target. */
18054 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18062 /* Replicate the value once into the next wider mode and recurse. */
18063 val = convert_modes (wsmode, smode, val, true);
18064 x = expand_simple_binop (wsmode, ASHIFT, val,
18065 GEN_INT (GET_MODE_BITSIZE (smode)),
18066 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18067 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18069 x = gen_reg_rtx (wvmode);
18070 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18071 gcc_unreachable ();
18072 emit_move_insn (target, gen_lowpart (mode, x));
18080 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18081 whose ONE_VAR element is VAR, and other elements are zero. Return true
18085 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18086 rtx target, rtx var, int one_var)
18088 enum machine_mode vsimode;
18104 var = force_reg (GET_MODE_INNER (mode), var);
18105 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18106 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18111 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18112 new_target = gen_reg_rtx (mode);
18114 new_target = target;
18115 var = force_reg (GET_MODE_INNER (mode), var);
18116 x = gen_rtx_VEC_DUPLICATE (mode, var);
18117 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18118 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18121 /* We need to shuffle the value to the correct position, so
18122 create a new pseudo to store the intermediate result. */
18124 /* With SSE2, we can use the integer shuffle insns. */
18125 if (mode != V4SFmode && TARGET_SSE2)
18127 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18129 GEN_INT (one_var == 1 ? 0 : 1),
18130 GEN_INT (one_var == 2 ? 0 : 1),
18131 GEN_INT (one_var == 3 ? 0 : 1)));
18132 if (target != new_target)
18133 emit_move_insn (target, new_target);
18137 /* Otherwise convert the intermediate result to V4SFmode and
18138 use the SSE1 shuffle instructions. */
18139 if (mode != V4SFmode)
18141 tmp = gen_reg_rtx (V4SFmode);
18142 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18147 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18149 GEN_INT (one_var == 1 ? 0 : 1),
18150 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18151 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18153 if (mode != V4SFmode)
18154 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18155 else if (tmp != target)
18156 emit_move_insn (target, tmp);
18158 else if (target != new_target)
18159 emit_move_insn (target, new_target);
18164 vsimode = V4SImode;
18170 vsimode = V2SImode;
18176 /* Zero extend the variable element to SImode and recurse. */
18177 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18179 x = gen_reg_rtx (vsimode);
18180 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18182 gcc_unreachable ();
18184 emit_move_insn (target, gen_lowpart (mode, x));
18192 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18193 consisting of the values in VALS. It is known that all elements
18194 except ONE_VAR are constants. Return true if successful. */
18197 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18198 rtx target, rtx vals, int one_var)
18200 rtx var = XVECEXP (vals, 0, one_var);
18201 enum machine_mode wmode;
18204 const_vec = copy_rtx (vals);
18205 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18206 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18214 /* For the two element vectors, it's just as easy to use
18215 the general case. */
18231 /* There's no way to set one QImode entry easily. Combine
18232 the variable value with its adjacent constant value, and
18233 promote to an HImode set. */
18234 x = XVECEXP (vals, 0, one_var ^ 1);
18237 var = convert_modes (HImode, QImode, var, true);
18238 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18239 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18240 x = GEN_INT (INTVAL (x) & 0xff);
18244 var = convert_modes (HImode, QImode, var, true);
18245 x = gen_int_mode (INTVAL (x) << 8, HImode);
18247 if (x != const0_rtx)
18248 var = expand_simple_binop (HImode, IOR, var, x, var,
18249 1, OPTAB_LIB_WIDEN);
18251 x = gen_reg_rtx (wmode);
18252 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18253 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18255 emit_move_insn (target, gen_lowpart (mode, x));
18262 emit_move_insn (target, const_vec);
18263 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18267 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18268 all values variable, and none identical. */
18271 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18272 rtx target, rtx vals)
18274 enum machine_mode half_mode = GET_MODE_INNER (mode);
18275 rtx op0 = NULL, op1 = NULL;
18276 bool use_vec_concat = false;
18282 if (!mmx_ok && !TARGET_SSE)
18288 /* For the two element vectors, we always implement VEC_CONCAT. */
18289 op0 = XVECEXP (vals, 0, 0);
18290 op1 = XVECEXP (vals, 0, 1);
18291 use_vec_concat = true;
18295 half_mode = V2SFmode;
18298 half_mode = V2SImode;
18304 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18305 Recurse to load the two halves. */
18307 op0 = gen_reg_rtx (half_mode);
18308 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18309 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18311 op1 = gen_reg_rtx (half_mode);
18312 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18313 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18315 use_vec_concat = true;
18326 gcc_unreachable ();
18329 if (use_vec_concat)
18331 if (!register_operand (op0, half_mode))
18332 op0 = force_reg (half_mode, op0);
18333 if (!register_operand (op1, half_mode))
18334 op1 = force_reg (half_mode, op1);
18336 emit_insn (gen_rtx_SET (VOIDmode, target,
18337 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18341 int i, j, n_elts, n_words, n_elt_per_word;
18342 enum machine_mode inner_mode;
18343 rtx words[4], shift;
18345 inner_mode = GET_MODE_INNER (mode);
18346 n_elts = GET_MODE_NUNITS (mode);
18347 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18348 n_elt_per_word = n_elts / n_words;
18349 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18351 for (i = 0; i < n_words; ++i)
18353 rtx word = NULL_RTX;
18355 for (j = 0; j < n_elt_per_word; ++j)
18357 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18358 elt = convert_modes (word_mode, inner_mode, elt, true);
18364 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18365 word, 1, OPTAB_LIB_WIDEN);
18366 word = expand_simple_binop (word_mode, IOR, word, elt,
18367 word, 1, OPTAB_LIB_WIDEN);
18375 emit_move_insn (target, gen_lowpart (mode, words[0]));
18376 else if (n_words == 2)
18378 rtx tmp = gen_reg_rtx (mode);
18379 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18380 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18381 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18382 emit_move_insn (target, tmp);
18384 else if (n_words == 4)
18386 rtx tmp = gen_reg_rtx (V4SImode);
18387 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18388 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18389 emit_move_insn (target, gen_lowpart (mode, tmp));
18392 gcc_unreachable ();
18396 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18397 instructions unless MMX_OK is true. */
18400 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18402 enum machine_mode mode = GET_MODE (target);
18403 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18404 int n_elts = GET_MODE_NUNITS (mode);
18405 int n_var = 0, one_var = -1;
18406 bool all_same = true, all_const_zero = true;
18410 for (i = 0; i < n_elts; ++i)
18412 x = XVECEXP (vals, 0, i);
18413 if (!CONSTANT_P (x))
18414 n_var++, one_var = i;
18415 else if (x != CONST0_RTX (inner_mode))
18416 all_const_zero = false;
18417 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18421 /* Constants are best loaded from the constant pool. */
18424 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18428 /* If all values are identical, broadcast the value. */
18430 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18431 XVECEXP (vals, 0, 0)))
18434 /* Values where only one field is non-constant are best loaded from
18435 the pool and overwritten via move later. */
18439 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18440 XVECEXP (vals, 0, one_var),
18444 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18448 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18452 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18454 enum machine_mode mode = GET_MODE (target);
18455 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18456 bool use_vec_merge = false;
18465 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18466 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18468 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18470 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18471 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18481 /* For the two element vectors, we implement a VEC_CONCAT with
18482 the extraction of the other element. */
18484 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18485 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18488 op0 = val, op1 = tmp;
18490 op0 = tmp, op1 = val;
18492 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18493 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18501 use_vec_merge = true;
18505 /* tmp = target = A B C D */
18506 tmp = copy_to_reg (target);
18507 /* target = A A B B */
18508 emit_insn (gen_sse_unpcklps (target, target, target));
18509 /* target = X A B B */
18510 ix86_expand_vector_set (false, target, val, 0);
18511 /* target = A X C D */
18512 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18513 GEN_INT (1), GEN_INT (0),
18514 GEN_INT (2+4), GEN_INT (3+4)));
18518 /* tmp = target = A B C D */
18519 tmp = copy_to_reg (target);
18520 /* tmp = X B C D */
18521 ix86_expand_vector_set (false, tmp, val, 0);
18522 /* target = A B X D */
18523 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18524 GEN_INT (0), GEN_INT (1),
18525 GEN_INT (0+4), GEN_INT (3+4)));
18529 /* tmp = target = A B C D */
18530 tmp = copy_to_reg (target);
18531 /* tmp = X B C D */
18532 ix86_expand_vector_set (false, tmp, val, 0);
18533 /* target = A B X D */
18534 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18535 GEN_INT (0), GEN_INT (1),
18536 GEN_INT (2+4), GEN_INT (0+4)));
18540 gcc_unreachable ();
18545 /* Element 0 handled by vec_merge below. */
18548 use_vec_merge = true;
18554 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18555 store into element 0, then shuffle them back. */
18559 order[0] = GEN_INT (elt);
18560 order[1] = const1_rtx;
18561 order[2] = const2_rtx;
18562 order[3] = GEN_INT (3);
18563 order[elt] = const0_rtx;
18565 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18566 order[1], order[2], order[3]));
18568 ix86_expand_vector_set (false, target, val, 0);
18570 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18571 order[1], order[2], order[3]));
18575 /* For SSE1, we have to reuse the V4SF code. */
18576 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18577 gen_lowpart (SFmode, val), elt);
18582 use_vec_merge = TARGET_SSE2;
18585 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18596 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18597 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18598 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18602 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18604 emit_move_insn (mem, target);
18606 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18607 emit_move_insn (tmp, val);
18609 emit_move_insn (target, mem);
18614 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18616 enum machine_mode mode = GET_MODE (vec);
18617 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18618 bool use_vec_extr = false;
18631 use_vec_extr = true;
18643 tmp = gen_reg_rtx (mode);
18644 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18645 GEN_INT (elt), GEN_INT (elt),
18646 GEN_INT (elt+4), GEN_INT (elt+4)));
18650 tmp = gen_reg_rtx (mode);
18651 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18655 gcc_unreachable ();
18658 use_vec_extr = true;
18673 tmp = gen_reg_rtx (mode);
18674 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18675 GEN_INT (elt), GEN_INT (elt),
18676 GEN_INT (elt), GEN_INT (elt)));
18680 tmp = gen_reg_rtx (mode);
18681 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18685 gcc_unreachable ();
18688 use_vec_extr = true;
18693 /* For SSE1, we have to reuse the V4SF code. */
18694 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18695 gen_lowpart (V4SFmode, vec), elt);
18701 use_vec_extr = TARGET_SSE2;
18704 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18709 /* ??? Could extract the appropriate HImode element and shift. */
18716 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18717 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18719 /* Let the rtl optimizers know about the zero extension performed. */
18720 if (inner_mode == HImode)
18722 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18723 target = gen_lowpart (SImode, target);
18726 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18730 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18732 emit_move_insn (mem, vec);
18734 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18735 emit_move_insn (target, tmp);
18739 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18740 pattern to reduce; DEST is the destination; IN is the input vector. */
18743 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18745 rtx tmp1, tmp2, tmp3;
18747 tmp1 = gen_reg_rtx (V4SFmode);
18748 tmp2 = gen_reg_rtx (V4SFmode);
18749 tmp3 = gen_reg_rtx (V4SFmode);
18751 emit_insn (gen_sse_movhlps (tmp1, in, in));
18752 emit_insn (fn (tmp2, tmp1, in));
18754 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18755 GEN_INT (1), GEN_INT (1),
18756 GEN_INT (1+4), GEN_INT (1+4)));
18757 emit_insn (fn (dest, tmp2, tmp3));
18760 /* Target hook for scalar_mode_supported_p. */
18762 ix86_scalar_mode_supported_p (enum machine_mode mode)
18764 if (DECIMAL_FLOAT_MODE_P (mode))
18767 return default_scalar_mode_supported_p (mode);
18770 /* Implements target hook vector_mode_supported_p. */
18772 ix86_vector_mode_supported_p (enum machine_mode mode)
18774 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18776 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18778 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18780 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18785 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18787 We do this in the new i386 backend to maintain source compatibility
18788 with the old cc0-based compiler. */
18791 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18792 tree inputs ATTRIBUTE_UNUSED,
18795 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18797 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18799 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18804 /* Return true if this goes in small data/bss. */
18807 ix86_in_large_data_p (tree exp)
18809 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18812 /* Functions are never large data. */
18813 if (TREE_CODE (exp) == FUNCTION_DECL)
18816 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18818 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18819 if (strcmp (section, ".ldata") == 0
18820 || strcmp (section, ".lbss") == 0)
18826 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18828 /* If this is an incomplete type with size 0, then we can't put it
18829 in data because it might be too big when completed. */
18830 if (!size || size > ix86_section_threshold)
18837 ix86_encode_section_info (tree decl, rtx rtl, int first)
18839 default_encode_section_info (decl, rtl, first);
18841 if (TREE_CODE (decl) == VAR_DECL
18842 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18843 && ix86_in_large_data_p (decl))
18844 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
18847 /* Worker function for REVERSE_CONDITION. */
18850 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
18852 return (mode != CCFPmode && mode != CCFPUmode
18853 ? reverse_condition (code)
18854 : reverse_condition_maybe_unordered (code));
18857 /* Output code to perform an x87 FP register move, from OPERANDS[1]
18861 output_387_reg_move (rtx insn, rtx *operands)
18863 if (REG_P (operands[1])
18864 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18866 if (REGNO (operands[0]) == FIRST_STACK_REG)
18867 return output_387_ffreep (operands, 0);
18868 return "fstp\t%y0";
18870 if (STACK_TOP_P (operands[0]))
18871 return "fld%z1\t%y1";
18875 /* Output code to perform a conditional jump to LABEL, if C2 flag in
18876 FP status register is set. */
18879 ix86_emit_fp_unordered_jump (rtx label)
18881 rtx reg = gen_reg_rtx (HImode);
18884 emit_insn (gen_x86_fnstsw_1 (reg));
18886 if (TARGET_USE_SAHF)
18888 emit_insn (gen_x86_sahf_1 (reg));
18890 temp = gen_rtx_REG (CCmode, FLAGS_REG);
18891 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
18895 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
18897 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18898 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
18901 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
18902 gen_rtx_LABEL_REF (VOIDmode, label),
18904 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
18905 emit_jump_insn (temp);
18908 /* Output code to perform a log1p XFmode calculation. */
18910 void ix86_emit_i387_log1p (rtx op0, rtx op1)
18912 rtx label1 = gen_label_rtx ();
18913 rtx label2 = gen_label_rtx ();
18915 rtx tmp = gen_reg_rtx (XFmode);
18916 rtx tmp2 = gen_reg_rtx (XFmode);
18918 emit_insn (gen_absxf2 (tmp, op1));
18919 emit_insn (gen_cmpxf (tmp,
18920 CONST_DOUBLE_FROM_REAL_VALUE (
18921 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
18923 emit_jump_insn (gen_bge (label1));
18925 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18926 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
18927 emit_jump (label2);
18929 emit_label (label1);
18930 emit_move_insn (tmp, CONST1_RTX (XFmode));
18931 emit_insn (gen_addxf3 (tmp, op1, tmp));
18932 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18933 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
18935 emit_label (label2);
18938 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
18941 i386_solaris_elf_named_section (const char *name, unsigned int flags,
18944 /* With Binutils 2.15, the "@unwind" marker must be specified on
18945 every occurrence of the ".eh_frame" section, not just the first
18948 && strcmp (name, ".eh_frame") == 0)
18950 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
18951 flags & SECTION_WRITE ? "aw" : "a");
18954 default_elf_asm_named_section (name, flags, decl);
18957 /* Return the mangling of TYPE if it is an extended fundamental type. */
18959 static const char *
18960 ix86_mangle_fundamental_type (tree type)
18962 switch (TYPE_MODE (type))
18965 /* __float128 is "g". */
18968 /* "long double" or __float80 is "e". */
18975 /* For 32-bit code we can save PIC register setup by using
18976 __stack_chk_fail_local hidden function instead of calling
18977 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
18978 register, so it is better to call __stack_chk_fail directly. */
18981 ix86_stack_protect_fail (void)
18983 return TARGET_64BIT
18984 ? default_external_stack_protect_fail ()
18985 : default_hidden_stack_protect_fail ();
18988 /* Select a format to encode pointers in exception handling data. CODE
18989 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
18990 true if the symbol may be affected by dynamic relocations.
18992 ??? All x86 object file formats are capable of representing this.
18993 After all, the relocation needed is the same as for the call insn.
18994 Whether or not a particular assembler allows us to enter such, I
18995 guess we'll have to see. */
18997 asm_preferred_eh_data_format (int code, int global)
19001 int type = DW_EH_PE_sdata8;
19003 || ix86_cmodel == CM_SMALL_PIC
19004 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19005 type = DW_EH_PE_sdata4;
19006 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19008 if (ix86_cmodel == CM_SMALL
19009 || (ix86_cmodel == CM_MEDIUM && code))
19010 return DW_EH_PE_udata4;
19011 return DW_EH_PE_absptr;
19014 #include "gt-i386.h"