1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
71 struct processor_costs size_cost = { /* costs for tuning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123 /* Processor costs (relative to an add) */
125 struct processor_costs i386_cost = { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
178 struct processor_costs i486_cost = { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
231 struct processor_costs pentium_cost = {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
284 struct processor_costs pentiumpro_cost = {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
337 struct processor_costs k6_cost = {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (2), /* cost of a lea instruction */
340 COSTS_N_INSNS (1), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (3), /* HI */
344 COSTS_N_INSNS (3), /* SI */
345 COSTS_N_INSNS (3), /* DI */
346 COSTS_N_INSNS (3)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (18), /* HI */
350 COSTS_N_INSNS (18), /* SI */
351 COSTS_N_INSNS (18), /* DI */
352 COSTS_N_INSNS (18)}, /* other */
353 COSTS_N_INSNS (2), /* cost of movsx */
354 COSTS_N_INSNS (2), /* cost of movzx */
355 8, /* "large" insn */
357 3, /* cost for loading QImode using movzbl */
358 {4, 5, 4}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {2, 3, 2}, /* cost of storing integer registers */
362 4, /* cost of reg,reg fld/fst */
363 {6, 6, 6}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 4, 4}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
367 2, /* cost of moving MMX register */
368 {2, 2}, /* cost of loading MMX registers
369 in SImode and DImode */
370 {2, 2}, /* cost of storing MMX registers
371 in SImode and DImode */
372 2, /* cost of moving SSE register */
373 {2, 2, 8}, /* cost of loading SSE registers
374 in SImode, DImode and TImode */
375 {2, 2, 8}, /* cost of storing SSE registers
376 in SImode, DImode and TImode */
377 6, /* MMX or SSE register to integer */
378 32, /* size of prefetch block */
379 1, /* number of parallel prefetches */
381 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
382 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
383 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
384 COSTS_N_INSNS (2), /* cost of FABS instruction. */
385 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
386 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
390 struct processor_costs athlon_cost = {
391 COSTS_N_INSNS (1), /* cost of an add instruction */
392 COSTS_N_INSNS (2), /* cost of a lea instruction */
393 COSTS_N_INSNS (1), /* variable shift costs */
394 COSTS_N_INSNS (1), /* constant shift costs */
395 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
396 COSTS_N_INSNS (5), /* HI */
397 COSTS_N_INSNS (5), /* SI */
398 COSTS_N_INSNS (5), /* DI */
399 COSTS_N_INSNS (5)}, /* other */
400 0, /* cost of multiply per each bit set */
401 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
402 COSTS_N_INSNS (26), /* HI */
403 COSTS_N_INSNS (42), /* SI */
404 COSTS_N_INSNS (74), /* DI */
405 COSTS_N_INSNS (74)}, /* other */
406 COSTS_N_INSNS (1), /* cost of movsx */
407 COSTS_N_INSNS (1), /* cost of movzx */
408 8, /* "large" insn */
410 4, /* cost for loading QImode using movzbl */
411 {3, 4, 3}, /* cost of loading integer registers
412 in QImode, HImode and SImode.
413 Relative to reg-reg move (2). */
414 {3, 4, 3}, /* cost of storing integer registers */
415 4, /* cost of reg,reg fld/fst */
416 {4, 4, 12}, /* cost of loading fp registers
417 in SFmode, DFmode and XFmode */
418 {6, 6, 8}, /* cost of storing fp registers
419 in SFmode, DFmode and XFmode */
420 2, /* cost of moving MMX register */
421 {4, 4}, /* cost of loading MMX registers
422 in SImode and DImode */
423 {4, 4}, /* cost of storing MMX registers
424 in SImode and DImode */
425 2, /* cost of moving SSE register */
426 {4, 4, 6}, /* cost of loading SSE registers
427 in SImode, DImode and TImode */
428 {4, 4, 5}, /* cost of storing SSE registers
429 in SImode, DImode and TImode */
430 5, /* MMX or SSE register to integer */
431 64, /* size of prefetch block */
432 6, /* number of parallel prefetches */
434 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
435 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
436 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
437 COSTS_N_INSNS (2), /* cost of FABS instruction. */
438 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
439 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
443 struct processor_costs k8_cost = {
444 COSTS_N_INSNS (1), /* cost of an add instruction */
445 COSTS_N_INSNS (2), /* cost of a lea instruction */
446 COSTS_N_INSNS (1), /* variable shift costs */
447 COSTS_N_INSNS (1), /* constant shift costs */
448 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
449 COSTS_N_INSNS (4), /* HI */
450 COSTS_N_INSNS (3), /* SI */
451 COSTS_N_INSNS (4), /* DI */
452 COSTS_N_INSNS (5)}, /* other */
453 0, /* cost of multiply per each bit set */
454 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
455 COSTS_N_INSNS (26), /* HI */
456 COSTS_N_INSNS (42), /* SI */
457 COSTS_N_INSNS (74), /* DI */
458 COSTS_N_INSNS (74)}, /* other */
459 COSTS_N_INSNS (1), /* cost of movsx */
460 COSTS_N_INSNS (1), /* cost of movzx */
461 8, /* "large" insn */
463 4, /* cost for loading QImode using movzbl */
464 {3, 4, 3}, /* cost of loading integer registers
465 in QImode, HImode and SImode.
466 Relative to reg-reg move (2). */
467 {3, 4, 3}, /* cost of storing integer registers */
468 4, /* cost of reg,reg fld/fst */
469 {4, 4, 12}, /* cost of loading fp registers
470 in SFmode, DFmode and XFmode */
471 {6, 6, 8}, /* cost of storing fp registers
472 in SFmode, DFmode and XFmode */
473 2, /* cost of moving MMX register */
474 {3, 3}, /* cost of loading MMX registers
475 in SImode and DImode */
476 {4, 4}, /* cost of storing MMX registers
477 in SImode and DImode */
478 2, /* cost of moving SSE register */
479 {4, 3, 6}, /* cost of loading SSE registers
480 in SImode, DImode and TImode */
481 {4, 4, 5}, /* cost of storing SSE registers
482 in SImode, DImode and TImode */
483 5, /* MMX or SSE register to integer */
484 64, /* size of prefetch block */
485 6, /* number of parallel prefetches */
487 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (2), /* cost of FABS instruction. */
491 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
496 struct processor_costs pentium4_cost = {
497 COSTS_N_INSNS (1), /* cost of an add instruction */
498 COSTS_N_INSNS (3), /* cost of a lea instruction */
499 COSTS_N_INSNS (4), /* variable shift costs */
500 COSTS_N_INSNS (4), /* constant shift costs */
501 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
502 COSTS_N_INSNS (15), /* HI */
503 COSTS_N_INSNS (15), /* SI */
504 COSTS_N_INSNS (15), /* DI */
505 COSTS_N_INSNS (15)}, /* other */
506 0, /* cost of multiply per each bit set */
507 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
508 COSTS_N_INSNS (56), /* HI */
509 COSTS_N_INSNS (56), /* SI */
510 COSTS_N_INSNS (56), /* DI */
511 COSTS_N_INSNS (56)}, /* other */
512 COSTS_N_INSNS (1), /* cost of movsx */
513 COSTS_N_INSNS (1), /* cost of movzx */
514 16, /* "large" insn */
516 2, /* cost for loading QImode using movzbl */
517 {4, 5, 4}, /* cost of loading integer registers
518 in QImode, HImode and SImode.
519 Relative to reg-reg move (2). */
520 {2, 3, 2}, /* cost of storing integer registers */
521 2, /* cost of reg,reg fld/fst */
522 {2, 2, 6}, /* cost of loading fp registers
523 in SFmode, DFmode and XFmode */
524 {4, 4, 6}, /* cost of storing fp registers
525 in SFmode, DFmode and XFmode */
526 2, /* cost of moving MMX register */
527 {2, 2}, /* cost of loading MMX registers
528 in SImode and DImode */
529 {2, 2}, /* cost of storing MMX registers
530 in SImode and DImode */
531 12, /* cost of moving SSE register */
532 {12, 12, 12}, /* cost of loading SSE registers
533 in SImode, DImode and TImode */
534 {2, 2, 8}, /* cost of storing SSE registers
535 in SImode, DImode and TImode */
536 10, /* MMX or SSE register to integer */
537 64, /* size of prefetch block */
538 6, /* number of parallel prefetches */
540 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
541 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
542 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
543 COSTS_N_INSNS (2), /* cost of FABS instruction. */
544 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
545 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
549 struct processor_costs nocona_cost = {
550 COSTS_N_INSNS (1), /* cost of an add instruction */
551 COSTS_N_INSNS (1), /* cost of a lea instruction */
552 COSTS_N_INSNS (1), /* variable shift costs */
553 COSTS_N_INSNS (1), /* constant shift costs */
554 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
555 COSTS_N_INSNS (10), /* HI */
556 COSTS_N_INSNS (10), /* SI */
557 COSTS_N_INSNS (10), /* DI */
558 COSTS_N_INSNS (10)}, /* other */
559 0, /* cost of multiply per each bit set */
560 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
561 COSTS_N_INSNS (66), /* HI */
562 COSTS_N_INSNS (66), /* SI */
563 COSTS_N_INSNS (66), /* DI */
564 COSTS_N_INSNS (66)}, /* other */
565 COSTS_N_INSNS (1), /* cost of movsx */
566 COSTS_N_INSNS (1), /* cost of movzx */
567 16, /* "large" insn */
569 4, /* cost for loading QImode using movzbl */
570 {4, 4, 4}, /* cost of loading integer registers
571 in QImode, HImode and SImode.
572 Relative to reg-reg move (2). */
573 {4, 4, 4}, /* cost of storing integer registers */
574 3, /* cost of reg,reg fld/fst */
575 {12, 12, 12}, /* cost of loading fp registers
576 in SFmode, DFmode and XFmode */
577 {4, 4, 4}, /* cost of storing fp registers
578 in SFmode, DFmode and XFmode */
579 6, /* cost of moving MMX register */
580 {12, 12}, /* cost of loading MMX registers
581 in SImode and DImode */
582 {12, 12}, /* cost of storing MMX registers
583 in SImode and DImode */
584 6, /* cost of moving SSE register */
585 {12, 12, 12}, /* cost of loading SSE registers
586 in SImode, DImode and TImode */
587 {12, 12, 12}, /* cost of storing SSE registers
588 in SImode, DImode and TImode */
589 8, /* MMX or SSE register to integer */
590 128, /* size of prefetch block */
591 8, /* number of parallel prefetches */
593 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
594 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
595 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
596 COSTS_N_INSNS (3), /* cost of FABS instruction. */
597 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
598 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
601 /* Generic64 should produce code tuned for Nocona and K8. */
603 struct processor_costs generic64_cost = {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 /* On all chips taken into consideration lea is 2 cycles and more. With
606 this cost however our current implementation of synth_mult results in
607 use of unnecessary temporary registers causing regression on several
608 SPECfp benchmarks. */
609 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (2)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (26), /* HI */
620 COSTS_N_INSNS (42), /* SI */
621 COSTS_N_INSNS (74), /* DI */
622 COSTS_N_INSNS (74)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {4, 4, 4}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {4, 4, 4}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {12, 12, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {8, 8}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {8, 8}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {8, 8, 8}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {8, 8, 8}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 5, /* MMX or SSE register to integer */
648 64, /* size of prefetch block */
649 6, /* number of parallel prefetches */
650 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
651 is increased to perhaps more appropriate value of 5. */
653 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (8), /* cost of FABS instruction. */
657 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
661 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
663 struct processor_costs generic32_cost = {
664 COSTS_N_INSNS (1), /* cost of an add instruction */
665 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
666 COSTS_N_INSNS (1), /* variable shift costs */
667 COSTS_N_INSNS (1), /* constant shift costs */
668 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
669 COSTS_N_INSNS (4), /* HI */
670 COSTS_N_INSNS (3), /* SI */
671 COSTS_N_INSNS (4), /* DI */
672 COSTS_N_INSNS (2)}, /* other */
673 0, /* cost of multiply per each bit set */
674 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
675 COSTS_N_INSNS (26), /* HI */
676 COSTS_N_INSNS (42), /* SI */
677 COSTS_N_INSNS (74), /* DI */
678 COSTS_N_INSNS (74)}, /* other */
679 COSTS_N_INSNS (1), /* cost of movsx */
680 COSTS_N_INSNS (1), /* cost of movzx */
681 8, /* "large" insn */
683 4, /* cost for loading QImode using movzbl */
684 {4, 4, 4}, /* cost of loading integer registers
685 in QImode, HImode and SImode.
686 Relative to reg-reg move (2). */
687 {4, 4, 4}, /* cost of storing integer registers */
688 4, /* cost of reg,reg fld/fst */
689 {12, 12, 12}, /* cost of loading fp registers
690 in SFmode, DFmode and XFmode */
691 {6, 6, 8}, /* cost of storing fp registers
692 in SFmode, DFmode and XFmode */
693 2, /* cost of moving MMX register */
694 {8, 8}, /* cost of loading MMX registers
695 in SImode and DImode */
696 {8, 8}, /* cost of storing MMX registers
697 in SImode and DImode */
698 2, /* cost of moving SSE register */
699 {8, 8, 8}, /* cost of loading SSE registers
700 in SImode, DImode and TImode */
701 {8, 8, 8}, /* cost of storing SSE registers
702 in SImode, DImode and TImode */
703 5, /* MMX or SSE register to integer */
704 64, /* size of prefetch block */
705 6, /* number of parallel prefetches */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
715 const struct processor_costs *ix86_cost = &pentium_cost;
717 /* Processor feature/optimization bitmasks. */
718 #define m_386 (1<<PROCESSOR_I386)
719 #define m_486 (1<<PROCESSOR_I486)
720 #define m_PENT (1<<PROCESSOR_PENTIUM)
721 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
722 #define m_K6 (1<<PROCESSOR_K6)
723 #define m_ATHLON (1<<PROCESSOR_ATHLON)
724 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
725 #define m_K8 (1<<PROCESSOR_K8)
726 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
727 #define m_NOCONA (1<<PROCESSOR_NOCONA)
728 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
729 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
730 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
732 /* Generic instruction choice should be common subset of supported CPUs
733 (PPro/PENT4/NOCONA/Athlon/K8). */
735 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
736 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
737 generic because it is not working well with PPro base chips. */
738 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
739 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
740 const int x86_zero_extend_with_and = m_486 | m_PENT;
741 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
742 const int x86_double_with_add = ~m_386;
743 const int x86_use_bit_test = m_386;
744 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
745 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
746 const int x86_3dnow_a = m_ATHLON_K8;
747 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
748 /* Branch hints were put in P4 based on simulation result. But
749 after P4 was made, no performance benefit was observed with
750 branch hints. It also increases the code size. As the result,
751 icc never generates branch hints. */
752 const int x86_branch_hints = 0;
753 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
754 /* We probably ought to watch for partial register stalls on Generic32
755 compilation setting as well. However in current implementation the
756 partial register stalls are not eliminated very well - they can
757 be introduced via subregs synthesized by combine and can happen
758 in caller/callee saving sequences.
759 Because this option pays back little on PPro based chips and is in conflict
760 with partial reg. dependencies used by Athlon/P4 based chips, it is better
761 to leave it off for generic32 for now. */
762 const int x86_partial_reg_stall = m_PPRO;
763 const int x86_partial_flag_reg_stall = m_GENERIC;
764 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
765 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
766 const int x86_use_mov0 = m_K6;
767 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
768 const int x86_read_modify_write = ~m_PENT;
769 const int x86_read_modify = ~(m_PENT | m_PPRO);
770 const int x86_split_long_moves = m_PPRO;
771 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
772 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
773 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
774 const int x86_qimode_math = ~(0);
775 const int x86_promote_qi_regs = 0;
776 /* On PPro this flag is meant to avoid partial register stalls. Just like
777 the x86_partial_reg_stall this option might be considered for Generic32
778 if our scheme for avoiding partial stalls was more effective. */
779 const int x86_himode_math = ~(m_PPRO);
780 const int x86_promote_hi_regs = m_PPRO;
781 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
782 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
783 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
784 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
785 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
786 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
787 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
788 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
789 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
790 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
791 const int x86_shift1 = ~m_486;
792 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
793 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
794 that thread 128bit SSE registers as single units versus K8 based chips that
795 divide SSE registers to two 64bit halves.
796 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
797 to allow register renaming on 128bit SSE units, but usually results in one
798 extra microop on 64bit SSE units. Experimental results shows that disabling
799 this option on P4 brings over 20% SPECfp regression, while enabling it on
800 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
802 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
803 /* Set for machines where the type and dependencies are resolved on SSE
804 register parts instead of whole registers, so we may maintain just
805 lower part of scalar values in proper format leaving the upper part
807 const int x86_sse_split_regs = m_ATHLON_K8;
808 const int x86_sse_typeless_stores = m_ATHLON_K8;
809 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
810 const int x86_use_ffreep = m_ATHLON_K8;
811 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
812 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
814 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
815 integer data in xmm registers. Which results in pretty abysmal code. */
816 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
818 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
819 /* Some CPU cores are not able to predict more than 4 branch instructions in
820 the 16 byte window. */
821 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
822 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
823 const int x86_use_bt = m_ATHLON_K8;
824 /* Compare and exchange was added for 80486. */
825 const int x86_cmpxchg = ~m_386;
826 /* Compare and exchange 8 bytes was added for pentium. */
827 const int x86_cmpxchg8b = ~(m_386 | m_486);
828 /* Compare and exchange 16 bytes was added for nocona. */
829 const int x86_cmpxchg16b = m_NOCONA;
830 /* Exchange and add was added for 80486. */
831 const int x86_xadd = ~m_386;
832 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
834 /* In case the average insn count for single function invocation is
835 lower than this constant, emit fast (but longer) prologue and
837 #define FAST_PROLOGUE_INSN_COUNT 20
839 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
840 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
841 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
842 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
844 /* Array of the smallest class containing reg number REGNO, indexed by
845 REGNO. Used by REGNO_REG_CLASS in i386.h. */
847 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
850 AREG, DREG, CREG, BREG,
852 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
854 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
855 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
858 /* flags, fpsr, dirflag, frame */
859 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
860 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
862 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
864 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
865 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
866 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
870 /* The "default" register map used in 32bit mode. */
872 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
874 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
875 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
876 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
877 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
878 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
879 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
880 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
883 static int const x86_64_int_parameter_registers[6] =
885 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
886 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
889 static int const x86_64_int_return_registers[4] =
891 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
894 /* The "default" register map used in 64bit mode. */
895 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
897 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
898 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
899 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
900 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
901 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
902 8,9,10,11,12,13,14,15, /* extended integer registers */
903 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
906 /* Define the register numbers to be used in Dwarf debugging information.
907 The SVR4 reference port C compiler uses the following register numbers
908 in its Dwarf output code:
909 0 for %eax (gcc regno = 0)
910 1 for %ecx (gcc regno = 2)
911 2 for %edx (gcc regno = 1)
912 3 for %ebx (gcc regno = 3)
913 4 for %esp (gcc regno = 7)
914 5 for %ebp (gcc regno = 6)
915 6 for %esi (gcc regno = 4)
916 7 for %edi (gcc regno = 5)
917 The following three DWARF register numbers are never generated by
918 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
919 believes these numbers have these meanings.
920 8 for %eip (no gcc equivalent)
921 9 for %eflags (gcc regno = 17)
922 10 for %trapno (no gcc equivalent)
923 It is not at all clear how we should number the FP stack registers
924 for the x86 architecture. If the version of SDB on x86/svr4 were
925 a bit less brain dead with respect to floating-point then we would
926 have a precedent to follow with respect to DWARF register numbers
927 for x86 FP registers, but the SDB on x86/svr4 is so completely
928 broken with respect to FP registers that it is hardly worth thinking
929 of it as something to strive for compatibility with.
930 The version of x86/svr4 SDB I have at the moment does (partially)
931 seem to believe that DWARF register number 11 is associated with
932 the x86 register %st(0), but that's about all. Higher DWARF
933 register numbers don't seem to be associated with anything in
934 particular, and even for DWARF regno 11, SDB only seems to under-
935 stand that it should say that a variable lives in %st(0) (when
936 asked via an `=' command) if we said it was in DWARF regno 11,
937 but SDB still prints garbage when asked for the value of the
938 variable in question (via a `/' command).
939 (Also note that the labels SDB prints for various FP stack regs
940 when doing an `x' command are all wrong.)
941 Note that these problems generally don't affect the native SVR4
942 C compiler because it doesn't allow the use of -O with -g and
943 because when it is *not* optimizing, it allocates a memory
944 location for each floating-point variable, and the memory
945 location is what gets described in the DWARF AT_location
946 attribute for the variable in question.
947 Regardless of the severe mental illness of the x86/svr4 SDB, we
948 do something sensible here and we use the following DWARF
949 register numbers. Note that these are all stack-top-relative
951 11 for %st(0) (gcc regno = 8)
952 12 for %st(1) (gcc regno = 9)
953 13 for %st(2) (gcc regno = 10)
954 14 for %st(3) (gcc regno = 11)
955 15 for %st(4) (gcc regno = 12)
956 16 for %st(5) (gcc regno = 13)
957 17 for %st(6) (gcc regno = 14)
958 18 for %st(7) (gcc regno = 15)
960 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
962 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
963 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
964 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
965 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
966 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
967 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
968 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
971 /* Test and compare insns in i386.md store the information needed to
972 generate branch and scc insns here. */
974 rtx ix86_compare_op0 = NULL_RTX;
975 rtx ix86_compare_op1 = NULL_RTX;
976 rtx ix86_compare_emitted = NULL_RTX;
978 /* Size of the register save area. */
979 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
981 /* Define the structure for the machine field in struct function. */
983 struct stack_local_entry GTY(())
988 struct stack_local_entry *next;
991 /* Structure describing stack frame layout.
992 Stack grows downward:
998 saved frame pointer if frame_pointer_needed
999 <- HARD_FRAME_POINTER
1004 [va_arg registers] (
1005 > to_allocate <- FRAME_POINTER
1015 HOST_WIDE_INT frame;
1017 int outgoing_arguments_size;
1020 HOST_WIDE_INT to_allocate;
1021 /* The offsets relative to ARG_POINTER. */
1022 HOST_WIDE_INT frame_pointer_offset;
1023 HOST_WIDE_INT hard_frame_pointer_offset;
1024 HOST_WIDE_INT stack_pointer_offset;
1026 /* When save_regs_using_mov is set, emit prologue using
1027 move instead of push instructions. */
1028 bool save_regs_using_mov;
1031 /* Code model option. */
1032 enum cmodel ix86_cmodel;
1034 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1036 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1038 /* Which unit we are generating floating point math for. */
1039 enum fpmath_unit ix86_fpmath;
1041 /* Which cpu are we scheduling for. */
1042 enum processor_type ix86_tune;
1043 /* Which instruction set architecture to use. */
1044 enum processor_type ix86_arch;
1046 /* true if sse prefetch instruction is not NOOP. */
1047 int x86_prefetch_sse;
1049 /* ix86_regparm_string as a number */
1050 static int ix86_regparm;
1052 /* -mstackrealign option */
1053 extern int ix86_force_align_arg_pointer;
1054 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1056 /* Preferred alignment for stack boundary in bits. */
1057 unsigned int ix86_preferred_stack_boundary;
1059 /* Values 1-5: see jump.c */
1060 int ix86_branch_cost;
1062 /* Variables which are this size or smaller are put in the data/bss
1063 or ldata/lbss sections. */
1065 int ix86_section_threshold = 65536;
1067 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1068 char internal_label_prefix[16];
1069 int internal_label_prefix_len;
1071 static bool ix86_handle_option (size_t, const char *, int);
1072 static void output_pic_addr_const (FILE *, rtx, int);
1073 static void put_condition_code (enum rtx_code, enum machine_mode,
1075 static const char *get_some_local_dynamic_name (void);
1076 static int get_some_local_dynamic_name_1 (rtx *, void *);
1077 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1078 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1080 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1081 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1083 static rtx get_thread_pointer (int);
1084 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1085 static void get_pc_thunk_name (char [32], unsigned int);
1086 static rtx gen_push (rtx);
1087 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1088 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1089 static struct machine_function * ix86_init_machine_status (void);
1090 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1091 static int ix86_nsaved_regs (void);
1092 static void ix86_emit_save_regs (void);
1093 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1094 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1095 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1096 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1097 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1098 static rtx ix86_expand_aligntest (rtx, int);
1099 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1100 static int ix86_issue_rate (void);
1101 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1102 static int ia32_multipass_dfa_lookahead (void);
1103 static void ix86_init_mmx_sse_builtins (void);
1104 static rtx x86_this_parameter (tree);
1105 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1106 HOST_WIDE_INT, tree);
1107 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1108 static void x86_file_start (void);
1109 static void ix86_reorg (void);
1110 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1111 static tree ix86_build_builtin_va_list (void);
1112 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1114 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1115 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1116 static bool ix86_vector_mode_supported_p (enum machine_mode);
1118 static int ix86_address_cost (rtx);
1119 static bool ix86_cannot_force_const_mem (rtx);
1120 static rtx ix86_delegitimize_address (rtx);
1122 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1124 struct builtin_description;
1125 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1127 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1129 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1130 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1131 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1132 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1133 static rtx safe_vector_operand (rtx, enum machine_mode);
1134 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1135 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1136 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1137 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1138 static int ix86_fp_comparison_cost (enum rtx_code code);
1139 static unsigned int ix86_select_alt_pic_regnum (void);
1140 static int ix86_save_reg (unsigned int, int);
1141 static void ix86_compute_frame_layout (struct ix86_frame *);
1142 static int ix86_comp_type_attributes (tree, tree);
1143 static int ix86_function_regparm (tree, tree);
1144 const struct attribute_spec ix86_attribute_table[];
1145 static bool ix86_function_ok_for_sibcall (tree, tree);
1146 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1147 static int ix86_value_regno (enum machine_mode, tree, tree);
1148 static bool contains_128bit_aligned_vector_p (tree);
1149 static rtx ix86_struct_value_rtx (tree, int);
1150 static bool ix86_ms_bitfield_layout_p (tree);
1151 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1152 static int extended_reg_mentioned_1 (rtx *, void *);
1153 static bool ix86_rtx_costs (rtx, int, int, int *);
1154 static int min_insn_size (rtx);
1155 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1156 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1157 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1159 static void ix86_init_builtins (void);
1160 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1161 static const char *ix86_mangle_fundamental_type (tree);
1162 static tree ix86_stack_protect_fail (void);
1163 static rtx ix86_internal_arg_pointer (void);
1164 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1166 /* This function is only used on Solaris. */
1167 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1170 /* Register class used for passing given 64bit part of the argument.
1171 These represent classes as documented by the PS ABI, with the exception
1172 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1173 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1175 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1176 whenever possible (upper half does contain padding).
1178 enum x86_64_reg_class
1181 X86_64_INTEGER_CLASS,
1182 X86_64_INTEGERSI_CLASS,
1189 X86_64_COMPLEX_X87_CLASS,
1192 static const char * const x86_64_reg_class_name[] = {
1193 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1194 "sseup", "x87", "x87up", "cplx87", "no"
1197 #define MAX_CLASSES 4
1199 /* Table of constants used by fldpi, fldln2, etc.... */
1200 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1201 static bool ext_80387_constants_init = 0;
1202 static void init_ext_80387_constants (void);
1203 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1204 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1205 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1206 static section *x86_64_elf_select_section (tree decl, int reloc,
1207 unsigned HOST_WIDE_INT align)
1210 /* Initialize the GCC target structure. */
1211 #undef TARGET_ATTRIBUTE_TABLE
1212 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1213 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1214 # undef TARGET_MERGE_DECL_ATTRIBUTES
1215 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1218 #undef TARGET_COMP_TYPE_ATTRIBUTES
1219 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1221 #undef TARGET_INIT_BUILTINS
1222 #define TARGET_INIT_BUILTINS ix86_init_builtins
1223 #undef TARGET_EXPAND_BUILTIN
1224 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1226 #undef TARGET_ASM_FUNCTION_EPILOGUE
1227 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1229 #undef TARGET_ENCODE_SECTION_INFO
1230 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1231 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1233 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1236 #undef TARGET_ASM_OPEN_PAREN
1237 #define TARGET_ASM_OPEN_PAREN ""
1238 #undef TARGET_ASM_CLOSE_PAREN
1239 #define TARGET_ASM_CLOSE_PAREN ""
1241 #undef TARGET_ASM_ALIGNED_HI_OP
1242 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1243 #undef TARGET_ASM_ALIGNED_SI_OP
1244 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1246 #undef TARGET_ASM_ALIGNED_DI_OP
1247 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1250 #undef TARGET_ASM_UNALIGNED_HI_OP
1251 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1252 #undef TARGET_ASM_UNALIGNED_SI_OP
1253 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1254 #undef TARGET_ASM_UNALIGNED_DI_OP
1255 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1257 #undef TARGET_SCHED_ADJUST_COST
1258 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1259 #undef TARGET_SCHED_ISSUE_RATE
1260 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1261 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1262 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1263 ia32_multipass_dfa_lookahead
1265 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1266 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1269 #undef TARGET_HAVE_TLS
1270 #define TARGET_HAVE_TLS true
1272 #undef TARGET_CANNOT_FORCE_CONST_MEM
1273 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1274 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1275 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1277 #undef TARGET_DELEGITIMIZE_ADDRESS
1278 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1280 #undef TARGET_MS_BITFIELD_LAYOUT_P
1281 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1284 #undef TARGET_BINDS_LOCAL_P
1285 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1288 #undef TARGET_ASM_OUTPUT_MI_THUNK
1289 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1290 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1291 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1293 #undef TARGET_ASM_FILE_START
1294 #define TARGET_ASM_FILE_START x86_file_start
1296 #undef TARGET_DEFAULT_TARGET_FLAGS
1297 #define TARGET_DEFAULT_TARGET_FLAGS \
1299 | TARGET_64BIT_DEFAULT \
1300 | TARGET_SUBTARGET_DEFAULT \
1301 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1303 #undef TARGET_HANDLE_OPTION
1304 #define TARGET_HANDLE_OPTION ix86_handle_option
1306 #undef TARGET_RTX_COSTS
1307 #define TARGET_RTX_COSTS ix86_rtx_costs
1308 #undef TARGET_ADDRESS_COST
1309 #define TARGET_ADDRESS_COST ix86_address_cost
1311 #undef TARGET_FIXED_CONDITION_CODE_REGS
1312 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1313 #undef TARGET_CC_MODES_COMPATIBLE
1314 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1316 #undef TARGET_MACHINE_DEPENDENT_REORG
1317 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1319 #undef TARGET_BUILD_BUILTIN_VA_LIST
1320 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1322 #undef TARGET_MD_ASM_CLOBBERS
1323 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1325 #undef TARGET_PROMOTE_PROTOTYPES
1326 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1327 #undef TARGET_STRUCT_VALUE_RTX
1328 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1329 #undef TARGET_SETUP_INCOMING_VARARGS
1330 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1331 #undef TARGET_MUST_PASS_IN_STACK
1332 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1333 #undef TARGET_PASS_BY_REFERENCE
1334 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1335 #undef TARGET_INTERNAL_ARG_POINTER
1336 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1337 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1338 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1340 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1341 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1343 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1344 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1346 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1347 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1350 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1351 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1354 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1355 #undef TARGET_INSERT_ATTRIBUTES
1356 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1359 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1360 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1362 #undef TARGET_STACK_PROTECT_FAIL
1363 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1365 #undef TARGET_FUNCTION_VALUE
1366 #define TARGET_FUNCTION_VALUE ix86_function_value
1368 struct gcc_target targetm = TARGET_INITIALIZER;
1371 /* The svr4 ABI for the i386 says that records and unions are returned
1373 #ifndef DEFAULT_PCC_STRUCT_RETURN
1374 #define DEFAULT_PCC_STRUCT_RETURN 1
1377 /* Implement TARGET_HANDLE_OPTION. */
1380 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1387 target_flags &= ~MASK_3DNOW_A;
1388 target_flags_explicit |= MASK_3DNOW_A;
1395 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1396 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1403 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1404 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1411 target_flags &= ~MASK_SSE3;
1412 target_flags_explicit |= MASK_SSE3;
1421 /* Sometimes certain combinations of command options do not make
1422 sense on a particular target machine. You can define a macro
1423 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1424 defined, is executed once just after all the command options have
1427 Don't use this macro to turn on various extra optimizations for
1428 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1431 override_options (void)
1434 int ix86_tune_defaulted = 0;
1436 /* Comes from final.c -- no real reason to change it. */
1437 #define MAX_CODE_ALIGN 16
1441 const struct processor_costs *cost; /* Processor costs */
1442 const int target_enable; /* Target flags to enable. */
1443 const int target_disable; /* Target flags to disable. */
1444 const int align_loop; /* Default alignments. */
1445 const int align_loop_max_skip;
1446 const int align_jump;
1447 const int align_jump_max_skip;
1448 const int align_func;
1450 const processor_target_table[PROCESSOR_max] =
1452 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1453 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1454 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1455 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1456 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1457 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1458 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1459 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1460 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1461 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1462 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1465 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1468 const char *const name; /* processor name or nickname. */
1469 const enum processor_type processor;
1470 const enum pta_flags
1476 PTA_PREFETCH_SSE = 16,
1482 const processor_alias_table[] =
1484 {"i386", PROCESSOR_I386, 0},
1485 {"i486", PROCESSOR_I486, 0},
1486 {"i586", PROCESSOR_PENTIUM, 0},
1487 {"pentium", PROCESSOR_PENTIUM, 0},
1488 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1489 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1490 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1491 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1492 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1493 {"i686", PROCESSOR_PENTIUMPRO, 0},
1494 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1495 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1496 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1497 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1498 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1499 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1500 | PTA_MMX | PTA_PREFETCH_SSE},
1501 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1502 | PTA_MMX | PTA_PREFETCH_SSE},
1503 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1504 | PTA_MMX | PTA_PREFETCH_SSE},
1505 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1506 | PTA_MMX | PTA_PREFETCH_SSE},
1507 {"k6", PROCESSOR_K6, PTA_MMX},
1508 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1509 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1510 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1512 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1513 | PTA_3DNOW | PTA_3DNOW_A},
1514 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1515 | PTA_3DNOW_A | PTA_SSE},
1516 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1517 | PTA_3DNOW_A | PTA_SSE},
1518 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1519 | PTA_3DNOW_A | PTA_SSE},
1520 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1521 | PTA_SSE | PTA_SSE2 },
1522 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1523 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1524 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1525 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1526 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1527 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1528 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1529 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1530 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1531 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1534 int const pta_size = ARRAY_SIZE (processor_alias_table);
1536 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1537 SUBTARGET_OVERRIDE_OPTIONS;
1540 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1541 SUBSUBTARGET_OVERRIDE_OPTIONS;
1544 /* -fPIC is the default for x86_64. */
1545 if (TARGET_MACHO && TARGET_64BIT)
1548 /* Set the default values for switches whose default depends on TARGET_64BIT
1549 in case they weren't overwritten by command line options. */
1552 /* Mach-O doesn't support omitting the frame pointer for now. */
1553 if (flag_omit_frame_pointer == 2)
1554 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1555 if (flag_asynchronous_unwind_tables == 2)
1556 flag_asynchronous_unwind_tables = 1;
1557 if (flag_pcc_struct_return == 2)
1558 flag_pcc_struct_return = 0;
1562 if (flag_omit_frame_pointer == 2)
1563 flag_omit_frame_pointer = 0;
1564 if (flag_asynchronous_unwind_tables == 2)
1565 flag_asynchronous_unwind_tables = 0;
1566 if (flag_pcc_struct_return == 2)
1567 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1570 /* Need to check -mtune=generic first. */
1571 if (ix86_tune_string)
1573 if (!strcmp (ix86_tune_string, "generic")
1574 || !strcmp (ix86_tune_string, "i686")
1575 /* As special support for cross compilers we read -mtune=native
1576 as -mtune=generic. With native compilers we won't see the
1577 -mtune=native, as it was changed by the driver. */
1578 || !strcmp (ix86_tune_string, "native"))
1581 ix86_tune_string = "generic64";
1583 ix86_tune_string = "generic32";
1585 else if (!strncmp (ix86_tune_string, "generic", 7))
1586 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1590 if (ix86_arch_string)
1591 ix86_tune_string = ix86_arch_string;
1592 if (!ix86_tune_string)
1594 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1595 ix86_tune_defaulted = 1;
1598 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1599 need to use a sensible tune option. */
1600 if (!strcmp (ix86_tune_string, "generic")
1601 || !strcmp (ix86_tune_string, "x86-64")
1602 || !strcmp (ix86_tune_string, "i686"))
1605 ix86_tune_string = "generic64";
1607 ix86_tune_string = "generic32";
1610 if (!strcmp (ix86_tune_string, "x86-64"))
1611 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1612 "-mtune=generic instead as appropriate.");
1614 if (!ix86_arch_string)
1615 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1616 if (!strcmp (ix86_arch_string, "generic"))
1617 error ("generic CPU can be used only for -mtune= switch");
1618 if (!strncmp (ix86_arch_string, "generic", 7))
1619 error ("bad value (%s) for -march= switch", ix86_arch_string);
1621 if (ix86_cmodel_string != 0)
1623 if (!strcmp (ix86_cmodel_string, "small"))
1624 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1625 else if (!strcmp (ix86_cmodel_string, "medium"))
1626 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1628 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1629 else if (!strcmp (ix86_cmodel_string, "32"))
1630 ix86_cmodel = CM_32;
1631 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1632 ix86_cmodel = CM_KERNEL;
1633 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1634 ix86_cmodel = CM_LARGE;
1636 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1640 ix86_cmodel = CM_32;
1642 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1644 if (ix86_asm_string != 0)
1647 && !strcmp (ix86_asm_string, "intel"))
1648 ix86_asm_dialect = ASM_INTEL;
1649 else if (!strcmp (ix86_asm_string, "att"))
1650 ix86_asm_dialect = ASM_ATT;
1652 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1654 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1655 error ("code model %qs not supported in the %s bit mode",
1656 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1657 if (ix86_cmodel == CM_LARGE)
1658 sorry ("code model %<large%> not supported yet");
1659 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1660 sorry ("%i-bit mode not compiled in",
1661 (target_flags & MASK_64BIT) ? 64 : 32);
1663 for (i = 0; i < pta_size; i++)
1664 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1666 ix86_arch = processor_alias_table[i].processor;
1667 /* Default cpu tuning to the architecture. */
1668 ix86_tune = ix86_arch;
1669 if (processor_alias_table[i].flags & PTA_MMX
1670 && !(target_flags_explicit & MASK_MMX))
1671 target_flags |= MASK_MMX;
1672 if (processor_alias_table[i].flags & PTA_3DNOW
1673 && !(target_flags_explicit & MASK_3DNOW))
1674 target_flags |= MASK_3DNOW;
1675 if (processor_alias_table[i].flags & PTA_3DNOW_A
1676 && !(target_flags_explicit & MASK_3DNOW_A))
1677 target_flags |= MASK_3DNOW_A;
1678 if (processor_alias_table[i].flags & PTA_SSE
1679 && !(target_flags_explicit & MASK_SSE))
1680 target_flags |= MASK_SSE;
1681 if (processor_alias_table[i].flags & PTA_SSE2
1682 && !(target_flags_explicit & MASK_SSE2))
1683 target_flags |= MASK_SSE2;
1684 if (processor_alias_table[i].flags & PTA_SSE3
1685 && !(target_flags_explicit & MASK_SSE3))
1686 target_flags |= MASK_SSE3;
1687 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1688 x86_prefetch_sse = true;
1689 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1690 error ("CPU you selected does not support x86-64 "
1696 error ("bad value (%s) for -march= switch", ix86_arch_string);
1698 for (i = 0; i < pta_size; i++)
1699 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1701 ix86_tune = processor_alias_table[i].processor;
1702 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1704 if (ix86_tune_defaulted)
1706 ix86_tune_string = "x86-64";
1707 for (i = 0; i < pta_size; i++)
1708 if (! strcmp (ix86_tune_string,
1709 processor_alias_table[i].name))
1711 ix86_tune = processor_alias_table[i].processor;
1714 error ("CPU you selected does not support x86-64 "
1717 /* Intel CPUs have always interpreted SSE prefetch instructions as
1718 NOPs; so, we can enable SSE prefetch instructions even when
1719 -mtune (rather than -march) points us to a processor that has them.
1720 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1721 higher processors. */
1722 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1723 x86_prefetch_sse = true;
1727 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1730 ix86_cost = &size_cost;
1732 ix86_cost = processor_target_table[ix86_tune].cost;
1733 target_flags |= processor_target_table[ix86_tune].target_enable;
1734 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1736 /* Arrange to set up i386_stack_locals for all functions. */
1737 init_machine_status = ix86_init_machine_status;
1739 /* Validate -mregparm= value. */
1740 if (ix86_regparm_string)
1742 i = atoi (ix86_regparm_string);
1743 if (i < 0 || i > REGPARM_MAX)
1744 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1750 ix86_regparm = REGPARM_MAX;
1752 /* If the user has provided any of the -malign-* options,
1753 warn and use that value only if -falign-* is not set.
1754 Remove this code in GCC 3.2 or later. */
1755 if (ix86_align_loops_string)
1757 warning (0, "-malign-loops is obsolete, use -falign-loops");
1758 if (align_loops == 0)
1760 i = atoi (ix86_align_loops_string);
1761 if (i < 0 || i > MAX_CODE_ALIGN)
1762 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1764 align_loops = 1 << i;
1768 if (ix86_align_jumps_string)
1770 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1771 if (align_jumps == 0)
1773 i = atoi (ix86_align_jumps_string);
1774 if (i < 0 || i > MAX_CODE_ALIGN)
1775 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1777 align_jumps = 1 << i;
1781 if (ix86_align_funcs_string)
1783 warning (0, "-malign-functions is obsolete, use -falign-functions");
1784 if (align_functions == 0)
1786 i = atoi (ix86_align_funcs_string);
1787 if (i < 0 || i > MAX_CODE_ALIGN)
1788 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1790 align_functions = 1 << i;
1794 /* Default align_* from the processor table. */
1795 if (align_loops == 0)
1797 align_loops = processor_target_table[ix86_tune].align_loop;
1798 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1800 if (align_jumps == 0)
1802 align_jumps = processor_target_table[ix86_tune].align_jump;
1803 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1805 if (align_functions == 0)
1807 align_functions = processor_target_table[ix86_tune].align_func;
1810 /* Validate -mbranch-cost= value, or provide default. */
1811 ix86_branch_cost = ix86_cost->branch_cost;
1812 if (ix86_branch_cost_string)
1814 i = atoi (ix86_branch_cost_string);
1816 error ("-mbranch-cost=%d is not between 0 and 5", i);
1818 ix86_branch_cost = i;
1820 if (ix86_section_threshold_string)
1822 i = atoi (ix86_section_threshold_string);
1824 error ("-mlarge-data-threshold=%d is negative", i);
1826 ix86_section_threshold = i;
1829 if (ix86_tls_dialect_string)
1831 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1832 ix86_tls_dialect = TLS_DIALECT_GNU;
1833 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1834 ix86_tls_dialect = TLS_DIALECT_GNU2;
1835 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1836 ix86_tls_dialect = TLS_DIALECT_SUN;
1838 error ("bad value (%s) for -mtls-dialect= switch",
1839 ix86_tls_dialect_string);
1842 /* Keep nonleaf frame pointers. */
1843 if (flag_omit_frame_pointer)
1844 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1845 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1846 flag_omit_frame_pointer = 1;
1848 /* If we're doing fast math, we don't care about comparison order
1849 wrt NaNs. This lets us use a shorter comparison sequence. */
1850 if (flag_unsafe_math_optimizations)
1851 target_flags &= ~MASK_IEEE_FP;
1853 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1854 since the insns won't need emulation. */
1855 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1856 target_flags &= ~MASK_NO_FANCY_MATH_387;
1858 /* Likewise, if the target doesn't have a 387, or we've specified
1859 software floating point, don't use 387 inline intrinsics. */
1861 target_flags |= MASK_NO_FANCY_MATH_387;
1863 /* Turn on SSE2 builtins for -msse3. */
1865 target_flags |= MASK_SSE2;
1867 /* Turn on SSE builtins for -msse2. */
1869 target_flags |= MASK_SSE;
1871 /* Turn on MMX builtins for -msse. */
1874 target_flags |= MASK_MMX & ~target_flags_explicit;
1875 x86_prefetch_sse = true;
1878 /* Turn on MMX builtins for 3Dnow. */
1880 target_flags |= MASK_MMX;
1884 if (TARGET_ALIGN_DOUBLE)
1885 error ("-malign-double makes no sense in the 64bit mode");
1887 error ("-mrtd calling convention not supported in the 64bit mode");
1889 /* Enable by default the SSE and MMX builtins. Do allow the user to
1890 explicitly disable any of these. In particular, disabling SSE and
1891 MMX for kernel code is extremely useful. */
1893 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1894 & ~target_flags_explicit);
1898 /* i386 ABI does not specify red zone. It still makes sense to use it
1899 when programmer takes care to stack from being destroyed. */
1900 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1901 target_flags |= MASK_NO_RED_ZONE;
1904 /* Validate -mpreferred-stack-boundary= value, or provide default.
1905 The default of 128 bits is for Pentium III's SSE __m128. We can't
1906 change it because of optimize_size. Otherwise, we can't mix object
1907 files compiled with -Os and -On. */
1908 ix86_preferred_stack_boundary = 128;
1909 if (ix86_preferred_stack_boundary_string)
1911 i = atoi (ix86_preferred_stack_boundary_string);
1912 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1913 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1914 TARGET_64BIT ? 4 : 2);
1916 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1919 /* Accept -msseregparm only if at least SSE support is enabled. */
1920 if (TARGET_SSEREGPARM
1922 error ("-msseregparm used without SSE enabled");
1924 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1926 if (ix86_fpmath_string != 0)
1928 if (! strcmp (ix86_fpmath_string, "387"))
1929 ix86_fpmath = FPMATH_387;
1930 else if (! strcmp (ix86_fpmath_string, "sse"))
1934 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1935 ix86_fpmath = FPMATH_387;
1938 ix86_fpmath = FPMATH_SSE;
1940 else if (! strcmp (ix86_fpmath_string, "387,sse")
1941 || ! strcmp (ix86_fpmath_string, "sse,387"))
1945 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1946 ix86_fpmath = FPMATH_387;
1948 else if (!TARGET_80387)
1950 warning (0, "387 instruction set disabled, using SSE arithmetics");
1951 ix86_fpmath = FPMATH_SSE;
1954 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1957 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1960 /* If the i387 is disabled, then do not return values in it. */
1962 target_flags &= ~MASK_FLOAT_RETURNS;
1964 if ((x86_accumulate_outgoing_args & TUNEMASK)
1965 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1967 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1969 /* ??? Unwind info is not correct around the CFG unless either a frame
1970 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1971 unwind info generation to be aware of the CFG and propagating states
1973 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1974 || flag_exceptions || flag_non_call_exceptions)
1975 && flag_omit_frame_pointer
1976 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1978 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1979 warning (0, "unwind tables currently require either a frame pointer "
1980 "or -maccumulate-outgoing-args for correctness");
1981 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1984 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1987 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1988 p = strchr (internal_label_prefix, 'X');
1989 internal_label_prefix_len = p - internal_label_prefix;
1993 /* When scheduling description is not available, disable scheduler pass
1994 so it won't slow down the compilation and make x87 code slower. */
1995 if (!TARGET_SCHEDULE)
1996 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1999 /* switch to the appropriate section for output of DECL.
2000 DECL is either a `VAR_DECL' node or a constant of some sort.
2001 RELOC indicates whether forming the initial value of DECL requires
2002 link-time relocations. */
2005 x86_64_elf_select_section (tree decl, int reloc,
2006 unsigned HOST_WIDE_INT align)
2008 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2009 && ix86_in_large_data_p (decl))
2011 const char *sname = NULL;
2012 unsigned int flags = SECTION_WRITE;
2013 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2018 case SECCAT_DATA_REL:
2019 sname = ".ldata.rel";
2021 case SECCAT_DATA_REL_LOCAL:
2022 sname = ".ldata.rel.local";
2024 case SECCAT_DATA_REL_RO:
2025 sname = ".ldata.rel.ro";
2027 case SECCAT_DATA_REL_RO_LOCAL:
2028 sname = ".ldata.rel.ro.local";
2032 flags |= SECTION_BSS;
2035 case SECCAT_RODATA_MERGE_STR:
2036 case SECCAT_RODATA_MERGE_STR_INIT:
2037 case SECCAT_RODATA_MERGE_CONST:
2041 case SECCAT_SRODATA:
2048 /* We don't split these for medium model. Place them into
2049 default sections and hope for best. */
2054 /* We might get called with string constants, but get_named_section
2055 doesn't like them as they are not DECLs. Also, we need to set
2056 flags in that case. */
2058 return get_section (sname, flags, NULL);
2059 return get_named_section (decl, sname, reloc);
2062 return default_elf_select_section (decl, reloc, align);
2065 /* Build up a unique section name, expressed as a
2066 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2067 RELOC indicates whether the initial value of EXP requires
2068 link-time relocations. */
2071 x86_64_elf_unique_section (tree decl, int reloc)
2073 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2074 && ix86_in_large_data_p (decl))
2076 const char *prefix = NULL;
2077 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2078 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2080 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2083 case SECCAT_DATA_REL:
2084 case SECCAT_DATA_REL_LOCAL:
2085 case SECCAT_DATA_REL_RO:
2086 case SECCAT_DATA_REL_RO_LOCAL:
2087 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2090 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2093 case SECCAT_RODATA_MERGE_STR:
2094 case SECCAT_RODATA_MERGE_STR_INIT:
2095 case SECCAT_RODATA_MERGE_CONST:
2096 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2098 case SECCAT_SRODATA:
2105 /* We don't split these for medium model. Place them into
2106 default sections and hope for best. */
2114 plen = strlen (prefix);
2116 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2117 name = targetm.strip_name_encoding (name);
2118 nlen = strlen (name);
2120 string = alloca (nlen + plen + 1);
2121 memcpy (string, prefix, plen);
2122 memcpy (string + plen, name, nlen + 1);
2124 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2128 default_unique_section (decl, reloc);
2131 #ifdef COMMON_ASM_OP
2132 /* This says how to output assembler code to declare an
2133 uninitialized external linkage data object.
2135 For medium model x86-64 we need to use .largecomm opcode for
2138 x86_elf_aligned_common (FILE *file,
2139 const char *name, unsigned HOST_WIDE_INT size,
2142 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2143 && size > (unsigned int)ix86_section_threshold)
2144 fprintf (file, ".largecomm\t");
2146 fprintf (file, "%s", COMMON_ASM_OP);
2147 assemble_name (file, name);
2148 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2149 size, align / BITS_PER_UNIT);
2152 /* Utility function for targets to use in implementing
2153 ASM_OUTPUT_ALIGNED_BSS. */
2156 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2157 const char *name, unsigned HOST_WIDE_INT size,
2160 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2161 && size > (unsigned int)ix86_section_threshold)
2162 switch_to_section (get_named_section (decl, ".lbss", 0));
2164 switch_to_section (bss_section);
2165 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2166 #ifdef ASM_DECLARE_OBJECT_NAME
2167 last_assemble_variable_decl = decl;
2168 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2170 /* Standard thing is just output label for the object. */
2171 ASM_OUTPUT_LABEL (file, name);
2172 #endif /* ASM_DECLARE_OBJECT_NAME */
2173 ASM_OUTPUT_SKIP (file, size ? size : 1);
2178 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2180 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2181 make the problem with not enough registers even worse. */
2182 #ifdef INSN_SCHEDULING
2184 flag_schedule_insns = 0;
2188 /* The Darwin libraries never set errno, so we might as well
2189 avoid calling them when that's the only reason we would. */
2190 flag_errno_math = 0;
2192 /* The default values of these switches depend on the TARGET_64BIT
2193 that is not known at this moment. Mark these values with 2 and
2194 let user the to override these. In case there is no command line option
2195 specifying them, we will set the defaults in override_options. */
2197 flag_omit_frame_pointer = 2;
2198 flag_pcc_struct_return = 2;
2199 flag_asynchronous_unwind_tables = 2;
2200 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2201 SUBTARGET_OPTIMIZATION_OPTIONS;
2205 /* Table of valid machine attributes. */
2206 const struct attribute_spec ix86_attribute_table[] =
2208 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2209 /* Stdcall attribute says callee is responsible for popping arguments
2210 if they are not variable. */
2211 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2212 /* Fastcall attribute says callee is responsible for popping arguments
2213 if they are not variable. */
2214 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2215 /* Cdecl attribute says the callee is a normal C declaration */
2216 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2217 /* Regparm attribute specifies how many integer arguments are to be
2218 passed in registers. */
2219 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2220 /* Sseregparm attribute says we are using x86_64 calling conventions
2221 for FP arguments. */
2222 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2223 /* force_align_arg_pointer says this function realigns the stack at entry. */
2224 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2225 false, true, true, ix86_handle_cconv_attribute },
2226 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2227 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2228 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2229 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2231 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2232 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2233 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2234 SUBTARGET_ATTRIBUTE_TABLE,
2236 { NULL, 0, 0, false, false, false, NULL }
2239 /* Decide whether we can make a sibling call to a function. DECL is the
2240 declaration of the function being targeted by the call and EXP is the
2241 CALL_EXPR representing the call. */
2244 ix86_function_ok_for_sibcall (tree decl, tree exp)
2249 /* If we are generating position-independent code, we cannot sibcall
2250 optimize any indirect call, or a direct call to a global function,
2251 as the PLT requires %ebx be live. */
2252 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2259 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2260 if (POINTER_TYPE_P (func))
2261 func = TREE_TYPE (func);
2264 /* Check that the return value locations are the same. Like
2265 if we are returning floats on the 80387 register stack, we cannot
2266 make a sibcall from a function that doesn't return a float to a
2267 function that does or, conversely, from a function that does return
2268 a float to a function that doesn't; the necessary stack adjustment
2269 would not be executed. This is also the place we notice
2270 differences in the return value ABI. Note that it is ok for one
2271 of the functions to have void return type as long as the return
2272 value of the other is passed in a register. */
2273 a = ix86_function_value (TREE_TYPE (exp), func, false);
2274 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2276 if (STACK_REG_P (a) || STACK_REG_P (b))
2278 if (!rtx_equal_p (a, b))
2281 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2283 else if (!rtx_equal_p (a, b))
2286 /* If this call is indirect, we'll need to be able to use a call-clobbered
2287 register for the address of the target function. Make sure that all
2288 such registers are not used for passing parameters. */
2289 if (!decl && !TARGET_64BIT)
2293 /* We're looking at the CALL_EXPR, we need the type of the function. */
2294 type = TREE_OPERAND (exp, 0); /* pointer expression */
2295 type = TREE_TYPE (type); /* pointer type */
2296 type = TREE_TYPE (type); /* function type */
2298 if (ix86_function_regparm (type, NULL) >= 3)
2300 /* ??? Need to count the actual number of registers to be used,
2301 not the possible number of registers. Fix later. */
2306 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2307 /* Dllimport'd functions are also called indirectly. */
2308 if (decl && DECL_DLLIMPORT_P (decl)
2309 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2313 /* If we forced aligned the stack, then sibcalling would unalign the
2314 stack, which may break the called function. */
2315 if (cfun->machine->force_align_arg_pointer)
2318 /* Otherwise okay. That also includes certain types of indirect calls. */
2322 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2323 calling convention attributes;
2324 arguments as in struct attribute_spec.handler. */
2327 ix86_handle_cconv_attribute (tree *node, tree name,
2329 int flags ATTRIBUTE_UNUSED,
2332 if (TREE_CODE (*node) != FUNCTION_TYPE
2333 && TREE_CODE (*node) != METHOD_TYPE
2334 && TREE_CODE (*node) != FIELD_DECL
2335 && TREE_CODE (*node) != TYPE_DECL)
2337 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2338 IDENTIFIER_POINTER (name));
2339 *no_add_attrs = true;
2343 /* Can combine regparm with all attributes but fastcall. */
2344 if (is_attribute_p ("regparm", name))
2348 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2350 error ("fastcall and regparm attributes are not compatible");
2353 cst = TREE_VALUE (args);
2354 if (TREE_CODE (cst) != INTEGER_CST)
2356 warning (OPT_Wattributes,
2357 "%qs attribute requires an integer constant argument",
2358 IDENTIFIER_POINTER (name));
2359 *no_add_attrs = true;
2361 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2363 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2364 IDENTIFIER_POINTER (name), REGPARM_MAX);
2365 *no_add_attrs = true;
2369 && lookup_attribute (ix86_force_align_arg_pointer_string,
2370 TYPE_ATTRIBUTES (*node))
2371 && compare_tree_int (cst, REGPARM_MAX-1))
2373 error ("%s functions limited to %d register parameters",
2374 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2382 warning (OPT_Wattributes, "%qs attribute ignored",
2383 IDENTIFIER_POINTER (name));
2384 *no_add_attrs = true;
2388 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2389 if (is_attribute_p ("fastcall", name))
2391 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2393 error ("fastcall and cdecl attributes are not compatible");
2395 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2397 error ("fastcall and stdcall attributes are not compatible");
2399 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2401 error ("fastcall and regparm attributes are not compatible");
2405 /* Can combine stdcall with fastcall (redundant), regparm and
2407 else if (is_attribute_p ("stdcall", name))
2409 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2411 error ("stdcall and cdecl attributes are not compatible");
2413 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2415 error ("stdcall and fastcall attributes are not compatible");
2419 /* Can combine cdecl with regparm and sseregparm. */
2420 else if (is_attribute_p ("cdecl", name))
2422 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2424 error ("stdcall and cdecl attributes are not compatible");
2426 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2428 error ("fastcall and cdecl attributes are not compatible");
2432 /* Can combine sseregparm with all attributes. */
2437 /* Return 0 if the attributes for two types are incompatible, 1 if they
2438 are compatible, and 2 if they are nearly compatible (which causes a
2439 warning to be generated). */
2442 ix86_comp_type_attributes (tree type1, tree type2)
2444 /* Check for mismatch of non-default calling convention. */
2445 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2447 if (TREE_CODE (type1) != FUNCTION_TYPE)
2450 /* Check for mismatched fastcall/regparm types. */
2451 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2452 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2453 || (ix86_function_regparm (type1, NULL)
2454 != ix86_function_regparm (type2, NULL)))
2457 /* Check for mismatched sseregparm types. */
2458 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2459 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2462 /* Check for mismatched return types (cdecl vs stdcall). */
2463 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2464 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2470 /* Return the regparm value for a function with the indicated TYPE and DECL.
2471 DECL may be NULL when calling function indirectly
2472 or considering a libcall. */
2475 ix86_function_regparm (tree type, tree decl)
2478 int regparm = ix86_regparm;
2479 bool user_convention = false;
2483 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2486 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2487 user_convention = true;
2490 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2493 user_convention = true;
2496 /* Use register calling convention for local functions when possible. */
2497 if (!TARGET_64BIT && !user_convention && decl
2498 && flag_unit_at_a_time && !profile_flag)
2500 struct cgraph_local_info *i = cgraph_local_info (decl);
2503 int local_regparm, globals = 0, regno;
2505 /* Make sure no regparm register is taken by a global register
2507 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2508 if (global_regs[local_regparm])
2510 /* We can't use regparm(3) for nested functions as these use
2511 static chain pointer in third argument. */
2512 if (local_regparm == 3
2513 && decl_function_context (decl)
2514 && !DECL_NO_STATIC_CHAIN (decl))
2516 /* If the function realigns its stackpointer, the
2517 prologue will clobber %ecx. If we've already
2518 generated code for the callee, the callee
2519 DECL_STRUCT_FUNCTION is gone, so we fall back to
2520 scanning the attributes for the self-realigning
2522 if ((DECL_STRUCT_FUNCTION (decl)
2523 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2524 || (!DECL_STRUCT_FUNCTION (decl)
2525 && lookup_attribute (ix86_force_align_arg_pointer_string,
2526 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2528 /* Each global register variable increases register preassure,
2529 so the more global reg vars there are, the smaller regparm
2530 optimization use, unless requested by the user explicitly. */
2531 for (regno = 0; regno < 6; regno++)
2532 if (global_regs[regno])
2535 = globals < local_regparm ? local_regparm - globals : 0;
2537 if (local_regparm > regparm)
2538 regparm = local_regparm;
2545 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2546 in SSE registers for a function with the indicated TYPE and DECL.
2547 DECL may be NULL when calling function indirectly
2548 or considering a libcall. Otherwise return 0. */
2551 ix86_function_sseregparm (tree type, tree decl)
2553 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2554 by the sseregparm attribute. */
2555 if (TARGET_SSEREGPARM
2557 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2562 error ("Calling %qD with attribute sseregparm without "
2563 "SSE/SSE2 enabled", decl);
2565 error ("Calling %qT with attribute sseregparm without "
2566 "SSE/SSE2 enabled", type);
2573 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2574 in SSE registers even for 32-bit mode and not just 3, but up to
2575 8 SSE arguments in registers. */
2576 if (!TARGET_64BIT && decl
2577 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2579 struct cgraph_local_info *i = cgraph_local_info (decl);
2581 return TARGET_SSE2 ? 2 : 1;
2587 /* Return true if EAX is live at the start of the function. Used by
2588 ix86_expand_prologue to determine if we need special help before
2589 calling allocate_stack_worker. */
2592 ix86_eax_live_at_start_p (void)
2594 /* Cheat. Don't bother working forward from ix86_function_regparm
2595 to the function type to whether an actual argument is located in
2596 eax. Instead just look at cfg info, which is still close enough
2597 to correct at this point. This gives false positives for broken
2598 functions that might use uninitialized data that happens to be
2599 allocated in eax, but who cares? */
2600 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2603 /* Value is the number of bytes of arguments automatically
2604 popped when returning from a subroutine call.
2605 FUNDECL is the declaration node of the function (as a tree),
2606 FUNTYPE is the data type of the function (as a tree),
2607 or for a library call it is an identifier node for the subroutine name.
2608 SIZE is the number of bytes of arguments passed on the stack.
2610 On the 80386, the RTD insn may be used to pop them if the number
2611 of args is fixed, but if the number is variable then the caller
2612 must pop them all. RTD can't be used for library calls now
2613 because the library is compiled with the Unix compiler.
2614 Use of RTD is a selectable option, since it is incompatible with
2615 standard Unix calling sequences. If the option is not selected,
2616 the caller must always pop the args.
2618 The attribute stdcall is equivalent to RTD on a per module basis. */
2621 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2623 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2625 /* Cdecl functions override -mrtd, and never pop the stack. */
2626 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2628 /* Stdcall and fastcall functions will pop the stack if not
2630 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2631 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2635 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2636 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2637 == void_type_node)))
2641 /* Lose any fake structure return argument if it is passed on the stack. */
2642 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2644 && !KEEP_AGGREGATE_RETURN_POINTER)
2646 int nregs = ix86_function_regparm (funtype, fundecl);
2649 return GET_MODE_SIZE (Pmode);
2655 /* Argument support functions. */
2657 /* Return true when register may be used to pass function parameters. */
2659 ix86_function_arg_regno_p (int regno)
2663 return (regno < REGPARM_MAX
2664 || (TARGET_MMX && MMX_REGNO_P (regno)
2665 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2666 || (TARGET_SSE && SSE_REGNO_P (regno)
2667 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2669 if (TARGET_SSE && SSE_REGNO_P (regno)
2670 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2672 /* RAX is used as hidden argument to va_arg functions. */
2675 for (i = 0; i < REGPARM_MAX; i++)
2676 if (regno == x86_64_int_parameter_registers[i])
2681 /* Return if we do not know how to pass TYPE solely in registers. */
2684 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2686 if (must_pass_in_stack_var_size_or_pad (mode, type))
2689 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2690 The layout_type routine is crafty and tries to trick us into passing
2691 currently unsupported vector types on the stack by using TImode. */
2692 return (!TARGET_64BIT && mode == TImode
2693 && type && TREE_CODE (type) != VECTOR_TYPE);
2696 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2697 for a call to a function whose data type is FNTYPE.
2698 For a library call, FNTYPE is 0. */
2701 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2702 tree fntype, /* tree ptr for function decl */
2703 rtx libname, /* SYMBOL_REF of library name or 0 */
2706 static CUMULATIVE_ARGS zero_cum;
2707 tree param, next_param;
2709 if (TARGET_DEBUG_ARG)
2711 fprintf (stderr, "\ninit_cumulative_args (");
2713 fprintf (stderr, "fntype code = %s, ret code = %s",
2714 tree_code_name[(int) TREE_CODE (fntype)],
2715 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2717 fprintf (stderr, "no fntype");
2720 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2725 /* Set up the number of registers to use for passing arguments. */
2726 cum->nregs = ix86_regparm;
2728 cum->sse_nregs = SSE_REGPARM_MAX;
2730 cum->mmx_nregs = MMX_REGPARM_MAX;
2731 cum->warn_sse = true;
2732 cum->warn_mmx = true;
2733 cum->maybe_vaarg = false;
2735 /* Use ecx and edx registers if function has fastcall attribute,
2736 else look for regparm information. */
2737 if (fntype && !TARGET_64BIT)
2739 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2745 cum->nregs = ix86_function_regparm (fntype, fndecl);
2748 /* Set up the number of SSE registers used for passing SFmode
2749 and DFmode arguments. Warn for mismatching ABI. */
2750 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2752 /* Determine if this function has variable arguments. This is
2753 indicated by the last argument being 'void_type_mode' if there
2754 are no variable arguments. If there are variable arguments, then
2755 we won't pass anything in registers in 32-bit mode. */
2757 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2759 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2760 param != 0; param = next_param)
2762 next_param = TREE_CHAIN (param);
2763 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2773 cum->float_in_sse = 0;
2775 cum->maybe_vaarg = true;
2779 if ((!fntype && !libname)
2780 || (fntype && !TYPE_ARG_TYPES (fntype)))
2781 cum->maybe_vaarg = true;
2783 if (TARGET_DEBUG_ARG)
2784 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2789 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2790 But in the case of vector types, it is some vector mode.
2792 When we have only some of our vector isa extensions enabled, then there
2793 are some modes for which vector_mode_supported_p is false. For these
2794 modes, the generic vector support in gcc will choose some non-vector mode
2795 in order to implement the type. By computing the natural mode, we'll
2796 select the proper ABI location for the operand and not depend on whatever
2797 the middle-end decides to do with these vector types. */
2799 static enum machine_mode
2800 type_natural_mode (tree type)
2802 enum machine_mode mode = TYPE_MODE (type);
2804 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2806 HOST_WIDE_INT size = int_size_in_bytes (type);
2807 if ((size == 8 || size == 16)
2808 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2809 && TYPE_VECTOR_SUBPARTS (type) > 1)
2811 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2813 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2814 mode = MIN_MODE_VECTOR_FLOAT;
2816 mode = MIN_MODE_VECTOR_INT;
2818 /* Get the mode which has this inner mode and number of units. */
2819 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2820 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2821 && GET_MODE_INNER (mode) == innermode)
2831 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2832 this may not agree with the mode that the type system has chosen for the
2833 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2834 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2837 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2842 if (orig_mode != BLKmode)
2843 tmp = gen_rtx_REG (orig_mode, regno);
2846 tmp = gen_rtx_REG (mode, regno);
2847 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2848 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2854 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2855 of this code is to classify each 8bytes of incoming argument by the register
2856 class and assign registers accordingly. */
2858 /* Return the union class of CLASS1 and CLASS2.
2859 See the x86-64 PS ABI for details. */
2861 static enum x86_64_reg_class
2862 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2864 /* Rule #1: If both classes are equal, this is the resulting class. */
2865 if (class1 == class2)
2868 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2870 if (class1 == X86_64_NO_CLASS)
2872 if (class2 == X86_64_NO_CLASS)
2875 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2876 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2877 return X86_64_MEMORY_CLASS;
2879 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2880 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2881 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2882 return X86_64_INTEGERSI_CLASS;
2883 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2884 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2885 return X86_64_INTEGER_CLASS;
2887 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2889 if (class1 == X86_64_X87_CLASS
2890 || class1 == X86_64_X87UP_CLASS
2891 || class1 == X86_64_COMPLEX_X87_CLASS
2892 || class2 == X86_64_X87_CLASS
2893 || class2 == X86_64_X87UP_CLASS
2894 || class2 == X86_64_COMPLEX_X87_CLASS)
2895 return X86_64_MEMORY_CLASS;
2897 /* Rule #6: Otherwise class SSE is used. */
2898 return X86_64_SSE_CLASS;
2901 /* Classify the argument of type TYPE and mode MODE.
2902 CLASSES will be filled by the register class used to pass each word
2903 of the operand. The number of words is returned. In case the parameter
2904 should be passed in memory, 0 is returned. As a special case for zero
2905 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2907 BIT_OFFSET is used internally for handling records and specifies offset
2908 of the offset in bits modulo 256 to avoid overflow cases.
2910 See the x86-64 PS ABI for details.
2914 classify_argument (enum machine_mode mode, tree type,
2915 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2917 HOST_WIDE_INT bytes =
2918 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2919 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2921 /* Variable sized entities are always passed/returned in memory. */
2925 if (mode != VOIDmode
2926 && targetm.calls.must_pass_in_stack (mode, type))
2929 if (type && AGGREGATE_TYPE_P (type))
2933 enum x86_64_reg_class subclasses[MAX_CLASSES];
2935 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2939 for (i = 0; i < words; i++)
2940 classes[i] = X86_64_NO_CLASS;
2942 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2943 signalize memory class, so handle it as special case. */
2946 classes[0] = X86_64_NO_CLASS;
2950 /* Classify each field of record and merge classes. */
2951 switch (TREE_CODE (type))
2954 /* For classes first merge in the field of the subclasses. */
2955 if (TYPE_BINFO (type))
2957 tree binfo, base_binfo;
2960 for (binfo = TYPE_BINFO (type), basenum = 0;
2961 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2964 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2965 tree type = BINFO_TYPE (base_binfo);
2967 num = classify_argument (TYPE_MODE (type),
2969 (offset + bit_offset) % 256);
2972 for (i = 0; i < num; i++)
2974 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2976 merge_classes (subclasses[i], classes[i + pos]);
2980 /* And now merge the fields of structure. */
2981 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2983 if (TREE_CODE (field) == FIELD_DECL)
2987 if (TREE_TYPE (field) == error_mark_node)
2990 /* Bitfields are always classified as integer. Handle them
2991 early, since later code would consider them to be
2992 misaligned integers. */
2993 if (DECL_BIT_FIELD (field))
2995 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2996 i < ((int_bit_position (field) + (bit_offset % 64))
2997 + tree_low_cst (DECL_SIZE (field), 0)
3000 merge_classes (X86_64_INTEGER_CLASS,
3005 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3006 TREE_TYPE (field), subclasses,
3007 (int_bit_position (field)
3008 + bit_offset) % 256);
3011 for (i = 0; i < num; i++)
3014 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3016 merge_classes (subclasses[i], classes[i + pos]);
3024 /* Arrays are handled as small records. */
3027 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3028 TREE_TYPE (type), subclasses, bit_offset);
3032 /* The partial classes are now full classes. */
3033 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3034 subclasses[0] = X86_64_SSE_CLASS;
3035 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3036 subclasses[0] = X86_64_INTEGER_CLASS;
3038 for (i = 0; i < words; i++)
3039 classes[i] = subclasses[i % num];
3044 case QUAL_UNION_TYPE:
3045 /* Unions are similar to RECORD_TYPE but offset is always 0.
3048 /* Unions are not derived. */
3049 gcc_assert (!TYPE_BINFO (type)
3050 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3051 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3053 if (TREE_CODE (field) == FIELD_DECL)
3057 if (TREE_TYPE (field) == error_mark_node)
3060 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3061 TREE_TYPE (field), subclasses,
3065 for (i = 0; i < num; i++)
3066 classes[i] = merge_classes (subclasses[i], classes[i]);
3075 /* Final merger cleanup. */
3076 for (i = 0; i < words; i++)
3078 /* If one class is MEMORY, everything should be passed in
3080 if (classes[i] == X86_64_MEMORY_CLASS)
3083 /* The X86_64_SSEUP_CLASS should be always preceded by
3084 X86_64_SSE_CLASS. */
3085 if (classes[i] == X86_64_SSEUP_CLASS
3086 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3087 classes[i] = X86_64_SSE_CLASS;
3089 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3090 if (classes[i] == X86_64_X87UP_CLASS
3091 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3092 classes[i] = X86_64_SSE_CLASS;
3097 /* Compute alignment needed. We align all types to natural boundaries with
3098 exception of XFmode that is aligned to 64bits. */
3099 if (mode != VOIDmode && mode != BLKmode)
3101 int mode_alignment = GET_MODE_BITSIZE (mode);
3104 mode_alignment = 128;
3105 else if (mode == XCmode)
3106 mode_alignment = 256;
3107 if (COMPLEX_MODE_P (mode))
3108 mode_alignment /= 2;
3109 /* Misaligned fields are always returned in memory. */
3110 if (bit_offset % mode_alignment)
3114 /* for V1xx modes, just use the base mode */
3115 if (VECTOR_MODE_P (mode)
3116 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3117 mode = GET_MODE_INNER (mode);
3119 /* Classification of atomic types. */
3124 classes[0] = X86_64_SSE_CLASS;
3127 classes[0] = X86_64_SSE_CLASS;
3128 classes[1] = X86_64_SSEUP_CLASS;
3137 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3138 classes[0] = X86_64_INTEGERSI_CLASS;
3140 classes[0] = X86_64_INTEGER_CLASS;
3144 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3149 if (!(bit_offset % 64))
3150 classes[0] = X86_64_SSESF_CLASS;
3152 classes[0] = X86_64_SSE_CLASS;
3155 classes[0] = X86_64_SSEDF_CLASS;
3158 classes[0] = X86_64_X87_CLASS;
3159 classes[1] = X86_64_X87UP_CLASS;
3162 classes[0] = X86_64_SSE_CLASS;
3163 classes[1] = X86_64_SSEUP_CLASS;
3166 classes[0] = X86_64_SSE_CLASS;
3169 classes[0] = X86_64_SSEDF_CLASS;
3170 classes[1] = X86_64_SSEDF_CLASS;
3173 classes[0] = X86_64_COMPLEX_X87_CLASS;
3176 /* This modes is larger than 16 bytes. */
3184 classes[0] = X86_64_SSE_CLASS;
3185 classes[1] = X86_64_SSEUP_CLASS;
3191 classes[0] = X86_64_SSE_CLASS;
3197 gcc_assert (VECTOR_MODE_P (mode));
3202 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3204 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3205 classes[0] = X86_64_INTEGERSI_CLASS;
3207 classes[0] = X86_64_INTEGER_CLASS;
3208 classes[1] = X86_64_INTEGER_CLASS;
3209 return 1 + (bytes > 8);
3213 /* Examine the argument and return set number of register required in each
3214 class. Return 0 iff parameter should be passed in memory. */
3216 examine_argument (enum machine_mode mode, tree type, int in_return,
3217 int *int_nregs, int *sse_nregs)
3219 enum x86_64_reg_class class[MAX_CLASSES];
3220 int n = classify_argument (mode, type, class, 0);
3226 for (n--; n >= 0; n--)
3229 case X86_64_INTEGER_CLASS:
3230 case X86_64_INTEGERSI_CLASS:
3233 case X86_64_SSE_CLASS:
3234 case X86_64_SSESF_CLASS:
3235 case X86_64_SSEDF_CLASS:
3238 case X86_64_NO_CLASS:
3239 case X86_64_SSEUP_CLASS:
3241 case X86_64_X87_CLASS:
3242 case X86_64_X87UP_CLASS:
3246 case X86_64_COMPLEX_X87_CLASS:
3247 return in_return ? 2 : 0;
3248 case X86_64_MEMORY_CLASS:
3254 /* Construct container for the argument used by GCC interface. See
3255 FUNCTION_ARG for the detailed description. */
3258 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3259 tree type, int in_return, int nintregs, int nsseregs,
3260 const int *intreg, int sse_regno)
3262 /* The following variables hold the static issued_error state. */
3263 static bool issued_sse_arg_error;
3264 static bool issued_sse_ret_error;
3265 static bool issued_x87_ret_error;
3267 enum machine_mode tmpmode;
3269 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3270 enum x86_64_reg_class class[MAX_CLASSES];
3274 int needed_sseregs, needed_intregs;
3275 rtx exp[MAX_CLASSES];
3278 n = classify_argument (mode, type, class, 0);
3279 if (TARGET_DEBUG_ARG)
3282 fprintf (stderr, "Memory class\n");
3285 fprintf (stderr, "Classes:");
3286 for (i = 0; i < n; i++)
3288 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3290 fprintf (stderr, "\n");
3295 if (!examine_argument (mode, type, in_return, &needed_intregs,
3298 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3301 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3302 some less clueful developer tries to use floating-point anyway. */
3303 if (needed_sseregs && !TARGET_SSE)
3307 if (!issued_sse_ret_error)
3309 error ("SSE register return with SSE disabled");
3310 issued_sse_ret_error = true;
3313 else if (!issued_sse_arg_error)
3315 error ("SSE register argument with SSE disabled");
3316 issued_sse_arg_error = true;
3321 /* Likewise, error if the ABI requires us to return values in the
3322 x87 registers and the user specified -mno-80387. */
3323 if (!TARGET_80387 && in_return)
3324 for (i = 0; i < n; i++)
3325 if (class[i] == X86_64_X87_CLASS
3326 || class[i] == X86_64_X87UP_CLASS
3327 || class[i] == X86_64_COMPLEX_X87_CLASS)
3329 if (!issued_x87_ret_error)
3331 error ("x87 register return with x87 disabled");
3332 issued_x87_ret_error = true;
3337 /* First construct simple cases. Avoid SCmode, since we want to use
3338 single register to pass this type. */
3339 if (n == 1 && mode != SCmode)
3342 case X86_64_INTEGER_CLASS:
3343 case X86_64_INTEGERSI_CLASS:
3344 return gen_rtx_REG (mode, intreg[0]);
3345 case X86_64_SSE_CLASS:
3346 case X86_64_SSESF_CLASS:
3347 case X86_64_SSEDF_CLASS:
3348 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3349 case X86_64_X87_CLASS:
3350 case X86_64_COMPLEX_X87_CLASS:
3351 return gen_rtx_REG (mode, FIRST_STACK_REG);
3352 case X86_64_NO_CLASS:
3353 /* Zero sized array, struct or class. */
3358 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3360 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3362 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3363 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3364 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3365 && class[1] == X86_64_INTEGER_CLASS
3366 && (mode == CDImode || mode == TImode || mode == TFmode)
3367 && intreg[0] + 1 == intreg[1])
3368 return gen_rtx_REG (mode, intreg[0]);
3370 /* Otherwise figure out the entries of the PARALLEL. */
3371 for (i = 0; i < n; i++)
3375 case X86_64_NO_CLASS:
3377 case X86_64_INTEGER_CLASS:
3378 case X86_64_INTEGERSI_CLASS:
3379 /* Merge TImodes on aligned occasions here too. */
3380 if (i * 8 + 8 > bytes)
3381 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3382 else if (class[i] == X86_64_INTEGERSI_CLASS)
3386 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3387 if (tmpmode == BLKmode)
3389 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3390 gen_rtx_REG (tmpmode, *intreg),
3394 case X86_64_SSESF_CLASS:
3395 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3396 gen_rtx_REG (SFmode,
3397 SSE_REGNO (sse_regno)),
3401 case X86_64_SSEDF_CLASS:
3402 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3403 gen_rtx_REG (DFmode,
3404 SSE_REGNO (sse_regno)),
3408 case X86_64_SSE_CLASS:
3409 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3413 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3414 gen_rtx_REG (tmpmode,
3415 SSE_REGNO (sse_regno)),
3417 if (tmpmode == TImode)
3426 /* Empty aligned struct, union or class. */
3430 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3431 for (i = 0; i < nexps; i++)
3432 XVECEXP (ret, 0, i) = exp [i];
3436 /* Update the data in CUM to advance over an argument
3437 of mode MODE and data type TYPE.
3438 (TYPE is null for libcalls where that information may not be available.) */
3441 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3442 tree type, int named)
3445 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3446 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3449 mode = type_natural_mode (type);
3451 if (TARGET_DEBUG_ARG)
3452 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3453 "mode=%s, named=%d)\n\n",
3454 words, cum->words, cum->nregs, cum->sse_nregs,
3455 GET_MODE_NAME (mode), named);
3459 int int_nregs, sse_nregs;
3460 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3461 cum->words += words;
3462 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3464 cum->nregs -= int_nregs;
3465 cum->sse_nregs -= sse_nregs;
3466 cum->regno += int_nregs;
3467 cum->sse_regno += sse_nregs;
3470 cum->words += words;
3488 cum->words += words;
3489 cum->nregs -= words;
3490 cum->regno += words;
3492 if (cum->nregs <= 0)
3500 if (cum->float_in_sse < 2)
3503 if (cum->float_in_sse < 1)
3514 if (!type || !AGGREGATE_TYPE_P (type))
3516 cum->sse_words += words;
3517 cum->sse_nregs -= 1;
3518 cum->sse_regno += 1;
3519 if (cum->sse_nregs <= 0)
3531 if (!type || !AGGREGATE_TYPE_P (type))
3533 cum->mmx_words += words;
3534 cum->mmx_nregs -= 1;
3535 cum->mmx_regno += 1;
3536 if (cum->mmx_nregs <= 0)
3547 /* Define where to put the arguments to a function.
3548 Value is zero to push the argument on the stack,
3549 or a hard register in which to store the argument.
3551 MODE is the argument's machine mode.
3552 TYPE is the data type of the argument (as a tree).
3553 This is null for libcalls where that information may
3555 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3556 the preceding args and about the function being called.
3557 NAMED is nonzero if this argument is a named parameter
3558 (otherwise it is an extra parameter matching an ellipsis). */
3561 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3562 tree type, int named)
3564 enum machine_mode mode = orig_mode;
3567 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3568 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3569 static bool warnedsse, warnedmmx;
3571 /* To simplify the code below, represent vector types with a vector mode
3572 even if MMX/SSE are not active. */
3573 if (type && TREE_CODE (type) == VECTOR_TYPE)
3574 mode = type_natural_mode (type);
3576 /* Handle a hidden AL argument containing number of registers for varargs
3577 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3579 if (mode == VOIDmode)
3582 return GEN_INT (cum->maybe_vaarg
3583 ? (cum->sse_nregs < 0
3591 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3593 &x86_64_int_parameter_registers [cum->regno],
3598 /* For now, pass fp/complex values on the stack. */
3610 if (words <= cum->nregs)
3612 int regno = cum->regno;
3614 /* Fastcall allocates the first two DWORD (SImode) or
3615 smaller arguments to ECX and EDX. */
3618 if (mode == BLKmode || mode == DImode)
3621 /* ECX not EAX is the first allocated register. */
3625 ret = gen_rtx_REG (mode, regno);
3629 if (cum->float_in_sse < 2)
3632 if (cum->float_in_sse < 1)
3642 if (!type || !AGGREGATE_TYPE_P (type))
3644 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3647 warning (0, "SSE vector argument without SSE enabled "
3651 ret = gen_reg_or_parallel (mode, orig_mode,
3652 cum->sse_regno + FIRST_SSE_REG);
3659 if (!type || !AGGREGATE_TYPE_P (type))
3661 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3664 warning (0, "MMX vector argument without MMX enabled "
3668 ret = gen_reg_or_parallel (mode, orig_mode,
3669 cum->mmx_regno + FIRST_MMX_REG);
3674 if (TARGET_DEBUG_ARG)
3677 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3678 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3681 print_simple_rtl (stderr, ret);
3683 fprintf (stderr, ", stack");
3685 fprintf (stderr, " )\n");
3691 /* A C expression that indicates when an argument must be passed by
3692 reference. If nonzero for an argument, a copy of that argument is
3693 made in memory and a pointer to the argument is passed instead of
3694 the argument itself. The pointer is passed in whatever way is
3695 appropriate for passing a pointer to that type. */
3698 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3699 enum machine_mode mode ATTRIBUTE_UNUSED,
3700 tree type, bool named ATTRIBUTE_UNUSED)
3705 if (type && int_size_in_bytes (type) == -1)
3707 if (TARGET_DEBUG_ARG)
3708 fprintf (stderr, "function_arg_pass_by_reference\n");
3715 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3716 ABI. Only called if TARGET_SSE. */
3718 contains_128bit_aligned_vector_p (tree type)
3720 enum machine_mode mode = TYPE_MODE (type);
3721 if (SSE_REG_MODE_P (mode)
3722 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3724 if (TYPE_ALIGN (type) < 128)
3727 if (AGGREGATE_TYPE_P (type))
3729 /* Walk the aggregates recursively. */
3730 switch (TREE_CODE (type))
3734 case QUAL_UNION_TYPE:
3738 if (TYPE_BINFO (type))
3740 tree binfo, base_binfo;
3743 for (binfo = TYPE_BINFO (type), i = 0;
3744 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3745 if (contains_128bit_aligned_vector_p
3746 (BINFO_TYPE (base_binfo)))
3749 /* And now merge the fields of structure. */
3750 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3752 if (TREE_CODE (field) == FIELD_DECL
3753 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3760 /* Just for use if some languages passes arrays by value. */
3761 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3772 /* Gives the alignment boundary, in bits, of an argument with the
3773 specified mode and type. */
3776 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3780 align = TYPE_ALIGN (type);
3782 align = GET_MODE_ALIGNMENT (mode);
3783 if (align < PARM_BOUNDARY)
3784 align = PARM_BOUNDARY;
3787 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3788 make an exception for SSE modes since these require 128bit
3791 The handling here differs from field_alignment. ICC aligns MMX
3792 arguments to 4 byte boundaries, while structure fields are aligned
3793 to 8 byte boundaries. */
3795 align = PARM_BOUNDARY;
3798 if (!SSE_REG_MODE_P (mode))
3799 align = PARM_BOUNDARY;
3803 if (!contains_128bit_aligned_vector_p (type))
3804 align = PARM_BOUNDARY;
3812 /* Return true if N is a possible register number of function value. */
3814 ix86_function_value_regno_p (int regno)
3817 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3818 || (regno == FIRST_SSE_REG && TARGET_SSE))
3822 && (regno == FIRST_MMX_REG && TARGET_MMX))
3828 /* Define how to find the value returned by a function.
3829 VALTYPE is the data type of the value (as a tree).
3830 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3831 otherwise, FUNC is 0. */
3833 ix86_function_value (tree valtype, tree fntype_or_decl,
3834 bool outgoing ATTRIBUTE_UNUSED)
3836 enum machine_mode natmode = type_natural_mode (valtype);
3840 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3841 1, REGPARM_MAX, SSE_REGPARM_MAX,
3842 x86_64_int_return_registers, 0);
3843 /* For zero sized structures, construct_container return NULL, but we
3844 need to keep rest of compiler happy by returning meaningful value. */
3846 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3851 tree fn = NULL_TREE, fntype;
3853 && DECL_P (fntype_or_decl))
3854 fn = fntype_or_decl;
3855 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3856 return gen_rtx_REG (TYPE_MODE (valtype),
3857 ix86_value_regno (natmode, fn, fntype));
3861 /* Return true iff type is returned in memory. */
3863 ix86_return_in_memory (tree type)
3865 int needed_intregs, needed_sseregs, size;
3866 enum machine_mode mode = type_natural_mode (type);
3869 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3871 if (mode == BLKmode)
3874 size = int_size_in_bytes (type);
3876 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3879 if (VECTOR_MODE_P (mode) || mode == TImode)
3881 /* User-created vectors small enough to fit in EAX. */
3885 /* MMX/3dNow values are returned in MM0,
3886 except when it doesn't exits. */
3888 return (TARGET_MMX ? 0 : 1);
3890 /* SSE values are returned in XMM0, except when it doesn't exist. */
3892 return (TARGET_SSE ? 0 : 1);
3906 /* When returning SSE vector types, we have a choice of either
3907 (1) being abi incompatible with a -march switch, or
3908 (2) generating an error.
3909 Given no good solution, I think the safest thing is one warning.
3910 The user won't be able to use -Werror, but....
3912 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3913 called in response to actually generating a caller or callee that
3914 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3915 via aggregate_value_p for general type probing from tree-ssa. */
3918 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3920 static bool warnedsse, warnedmmx;
3924 /* Look at the return type of the function, not the function type. */
3925 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3927 if (!TARGET_SSE && !warnedsse)
3930 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3933 warning (0, "SSE vector return without SSE enabled "
3938 if (!TARGET_MMX && !warnedmmx)
3940 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3943 warning (0, "MMX vector return without MMX enabled "
3952 /* Define how to find the value returned by a library function
3953 assuming the value has mode MODE. */
3955 ix86_libcall_value (enum machine_mode mode)
3969 return gen_rtx_REG (mode, FIRST_SSE_REG);
3972 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3976 return gen_rtx_REG (mode, 0);
3980 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3983 /* Given a mode, return the register to use for a return value. */
3986 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3988 gcc_assert (!TARGET_64BIT);
3990 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3991 we normally prevent this case when mmx is not available. However
3992 some ABIs may require the result to be returned like DImode. */
3993 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3994 return TARGET_MMX ? FIRST_MMX_REG : 0;
3996 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3997 we prevent this case when sse is not available. However some ABIs
3998 may require the result to be returned like integer TImode. */
3999 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4000 return TARGET_SSE ? FIRST_SSE_REG : 0;
4002 /* Decimal floating point values can go in %eax, unlike other float modes. */
4003 if (DECIMAL_FLOAT_MODE_P (mode))
4006 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4007 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4010 /* Floating point return values in %st(0), except for local functions when
4011 SSE math is enabled or for functions with sseregparm attribute. */
4012 if ((func || fntype)
4013 && (mode == SFmode || mode == DFmode))
4015 int sse_level = ix86_function_sseregparm (fntype, func);
4016 if ((sse_level >= 1 && mode == SFmode)
4017 || (sse_level == 2 && mode == DFmode))
4018 return FIRST_SSE_REG;
4021 return FIRST_FLOAT_REG;
4024 /* Create the va_list data type. */
4027 ix86_build_builtin_va_list (void)
4029 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4031 /* For i386 we use plain pointer to argument area. */
4033 return build_pointer_type (char_type_node);
4035 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4036 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4038 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4039 unsigned_type_node);
4040 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4041 unsigned_type_node);
4042 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4044 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4047 va_list_gpr_counter_field = f_gpr;
4048 va_list_fpr_counter_field = f_fpr;
4050 DECL_FIELD_CONTEXT (f_gpr) = record;
4051 DECL_FIELD_CONTEXT (f_fpr) = record;
4052 DECL_FIELD_CONTEXT (f_ovf) = record;
4053 DECL_FIELD_CONTEXT (f_sav) = record;
4055 TREE_CHAIN (record) = type_decl;
4056 TYPE_NAME (record) = type_decl;
4057 TYPE_FIELDS (record) = f_gpr;
4058 TREE_CHAIN (f_gpr) = f_fpr;
4059 TREE_CHAIN (f_fpr) = f_ovf;
4060 TREE_CHAIN (f_ovf) = f_sav;
4062 layout_type (record);
4064 /* The correct type is an array type of one element. */
4065 return build_array_type (record, build_index_type (size_zero_node));
4068 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4071 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4072 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4075 CUMULATIVE_ARGS next_cum;
4076 rtx save_area = NULL_RTX, mem;
4089 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4092 /* Indicate to allocate space on the stack for varargs save area. */
4093 ix86_save_varrargs_registers = 1;
4095 cfun->stack_alignment_needed = 128;
4097 fntype = TREE_TYPE (current_function_decl);
4098 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4099 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4100 != void_type_node));
4102 /* For varargs, we do not want to skip the dummy va_dcl argument.
4103 For stdargs, we do want to skip the last named argument. */
4106 function_arg_advance (&next_cum, mode, type, 1);
4109 save_area = frame_pointer_rtx;
4111 set = get_varargs_alias_set ();
4113 for (i = next_cum.regno;
4115 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4118 mem = gen_rtx_MEM (Pmode,
4119 plus_constant (save_area, i * UNITS_PER_WORD));
4120 MEM_NOTRAP_P (mem) = 1;
4121 set_mem_alias_set (mem, set);
4122 emit_move_insn (mem, gen_rtx_REG (Pmode,
4123 x86_64_int_parameter_registers[i]));
4126 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4128 /* Now emit code to save SSE registers. The AX parameter contains number
4129 of SSE parameter registers used to call this function. We use
4130 sse_prologue_save insn template that produces computed jump across
4131 SSE saves. We need some preparation work to get this working. */
4133 label = gen_label_rtx ();
4134 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4136 /* Compute address to jump to :
4137 label - 5*eax + nnamed_sse_arguments*5 */
4138 tmp_reg = gen_reg_rtx (Pmode);
4139 nsse_reg = gen_reg_rtx (Pmode);
4140 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4141 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4142 gen_rtx_MULT (Pmode, nsse_reg,
4144 if (next_cum.sse_regno)
4147 gen_rtx_CONST (DImode,
4148 gen_rtx_PLUS (DImode,
4150 GEN_INT (next_cum.sse_regno * 4))));
4152 emit_move_insn (nsse_reg, label_ref);
4153 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4155 /* Compute address of memory block we save into. We always use pointer
4156 pointing 127 bytes after first byte to store - this is needed to keep
4157 instruction size limited by 4 bytes. */
4158 tmp_reg = gen_reg_rtx (Pmode);
4159 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4160 plus_constant (save_area,
4161 8 * REGPARM_MAX + 127)));
4162 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4163 MEM_NOTRAP_P (mem) = 1;
4164 set_mem_alias_set (mem, set);
4165 set_mem_align (mem, BITS_PER_WORD);
4167 /* And finally do the dirty job! */
4168 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4169 GEN_INT (next_cum.sse_regno), label));
4174 /* Implement va_start. */
4177 ix86_va_start (tree valist, rtx nextarg)
4179 HOST_WIDE_INT words, n_gpr, n_fpr;
4180 tree f_gpr, f_fpr, f_ovf, f_sav;
4181 tree gpr, fpr, ovf, sav, t;
4184 /* Only 64bit target needs something special. */
4187 std_expand_builtin_va_start (valist, nextarg);
4191 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4192 f_fpr = TREE_CHAIN (f_gpr);
4193 f_ovf = TREE_CHAIN (f_fpr);
4194 f_sav = TREE_CHAIN (f_ovf);
4196 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4197 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4198 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4199 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4200 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4202 /* Count number of gp and fp argument registers used. */
4203 words = current_function_args_info.words;
4204 n_gpr = current_function_args_info.regno;
4205 n_fpr = current_function_args_info.sse_regno;
4207 if (TARGET_DEBUG_ARG)
4208 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4209 (int) words, (int) n_gpr, (int) n_fpr);
4211 if (cfun->va_list_gpr_size)
4213 type = TREE_TYPE (gpr);
4214 t = build2 (MODIFY_EXPR, type, gpr,
4215 build_int_cst (type, n_gpr * 8));
4216 TREE_SIDE_EFFECTS (t) = 1;
4217 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4220 if (cfun->va_list_fpr_size)
4222 type = TREE_TYPE (fpr);
4223 t = build2 (MODIFY_EXPR, type, fpr,
4224 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4225 TREE_SIDE_EFFECTS (t) = 1;
4226 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4229 /* Find the overflow area. */
4230 type = TREE_TYPE (ovf);
4231 t = make_tree (type, virtual_incoming_args_rtx);
4233 t = build2 (PLUS_EXPR, type, t,
4234 build_int_cst (type, words * UNITS_PER_WORD));
4235 t = build2 (MODIFY_EXPR, type, ovf, t);
4236 TREE_SIDE_EFFECTS (t) = 1;
4237 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4239 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4241 /* Find the register save area.
4242 Prologue of the function save it right above stack frame. */
4243 type = TREE_TYPE (sav);
4244 t = make_tree (type, frame_pointer_rtx);
4245 t = build2 (MODIFY_EXPR, type, sav, t);
4246 TREE_SIDE_EFFECTS (t) = 1;
4247 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4251 /* Implement va_arg. */
4254 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4256 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4257 tree f_gpr, f_fpr, f_ovf, f_sav;
4258 tree gpr, fpr, ovf, sav, t;
4260 tree lab_false, lab_over = NULL_TREE;
4265 enum machine_mode nat_mode;
4267 /* Only 64bit target needs something special. */
4269 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4271 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4272 f_fpr = TREE_CHAIN (f_gpr);
4273 f_ovf = TREE_CHAIN (f_fpr);
4274 f_sav = TREE_CHAIN (f_ovf);
4276 valist = build_va_arg_indirect_ref (valist);
4277 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4278 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4279 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4280 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4282 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4284 type = build_pointer_type (type);
4285 size = int_size_in_bytes (type);
4286 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4288 nat_mode = type_natural_mode (type);
4289 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4290 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4292 /* Pull the value out of the saved registers. */
4294 addr = create_tmp_var (ptr_type_node, "addr");
4295 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4299 int needed_intregs, needed_sseregs;
4301 tree int_addr, sse_addr;
4303 lab_false = create_artificial_label ();
4304 lab_over = create_artificial_label ();
4306 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4308 need_temp = (!REG_P (container)
4309 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4310 || TYPE_ALIGN (type) > 128));
4312 /* In case we are passing structure, verify that it is consecutive block
4313 on the register save area. If not we need to do moves. */
4314 if (!need_temp && !REG_P (container))
4316 /* Verify that all registers are strictly consecutive */
4317 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4321 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4323 rtx slot = XVECEXP (container, 0, i);
4324 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4325 || INTVAL (XEXP (slot, 1)) != i * 16)
4333 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4335 rtx slot = XVECEXP (container, 0, i);
4336 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4337 || INTVAL (XEXP (slot, 1)) != i * 8)
4349 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4350 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4351 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4352 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4355 /* First ensure that we fit completely in registers. */
4358 t = build_int_cst (TREE_TYPE (gpr),
4359 (REGPARM_MAX - needed_intregs + 1) * 8);
4360 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4361 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4362 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4363 gimplify_and_add (t, pre_p);
4367 t = build_int_cst (TREE_TYPE (fpr),
4368 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4370 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4371 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4372 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4373 gimplify_and_add (t, pre_p);
4376 /* Compute index to start of area used for integer regs. */
4379 /* int_addr = gpr + sav; */
4380 t = fold_convert (ptr_type_node, gpr);
4381 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4382 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4383 gimplify_and_add (t, pre_p);
4387 /* sse_addr = fpr + sav; */
4388 t = fold_convert (ptr_type_node, fpr);
4389 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4390 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4391 gimplify_and_add (t, pre_p);
4396 tree temp = create_tmp_var (type, "va_arg_tmp");
4399 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4400 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4401 gimplify_and_add (t, pre_p);
4403 for (i = 0; i < XVECLEN (container, 0); i++)
4405 rtx slot = XVECEXP (container, 0, i);
4406 rtx reg = XEXP (slot, 0);
4407 enum machine_mode mode = GET_MODE (reg);
4408 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4409 tree addr_type = build_pointer_type (piece_type);
4412 tree dest_addr, dest;
4414 if (SSE_REGNO_P (REGNO (reg)))
4416 src_addr = sse_addr;
4417 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4421 src_addr = int_addr;
4422 src_offset = REGNO (reg) * 8;
4424 src_addr = fold_convert (addr_type, src_addr);
4425 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4426 size_int (src_offset)));
4427 src = build_va_arg_indirect_ref (src_addr);
4429 dest_addr = fold_convert (addr_type, addr);
4430 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4431 size_int (INTVAL (XEXP (slot, 1)))));
4432 dest = build_va_arg_indirect_ref (dest_addr);
4434 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4435 gimplify_and_add (t, pre_p);
4441 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4442 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4443 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4444 gimplify_and_add (t, pre_p);
4448 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4449 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4450 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4451 gimplify_and_add (t, pre_p);
4454 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4455 gimplify_and_add (t, pre_p);
4457 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4458 append_to_statement_list (t, pre_p);
4461 /* ... otherwise out of the overflow area. */
4463 /* Care for on-stack alignment if needed. */
4464 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4465 || integer_zerop (TYPE_SIZE (type)))
4469 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4470 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4471 build_int_cst (TREE_TYPE (ovf), align - 1));
4472 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4473 build_int_cst (TREE_TYPE (t), -align));
4475 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4477 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4478 gimplify_and_add (t2, pre_p);
4480 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4481 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4482 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4483 gimplify_and_add (t, pre_p);
4487 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4488 append_to_statement_list (t, pre_p);
4491 ptrtype = build_pointer_type (type);
4492 addr = fold_convert (ptrtype, addr);
4495 addr = build_va_arg_indirect_ref (addr);
4496 return build_va_arg_indirect_ref (addr);
4499 /* Return nonzero if OPNUM's MEM should be matched
4500 in movabs* patterns. */
4503 ix86_check_movabs (rtx insn, int opnum)
4507 set = PATTERN (insn);
4508 if (GET_CODE (set) == PARALLEL)
4509 set = XVECEXP (set, 0, 0);
4510 gcc_assert (GET_CODE (set) == SET);
4511 mem = XEXP (set, opnum);
4512 while (GET_CODE (mem) == SUBREG)
4513 mem = SUBREG_REG (mem);
4514 gcc_assert (GET_CODE (mem) == MEM);
4515 return (volatile_ok || !MEM_VOLATILE_P (mem));
4518 /* Initialize the table of extra 80387 mathematical constants. */
4521 init_ext_80387_constants (void)
4523 static const char * cst[5] =
4525 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4526 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4527 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4528 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4529 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4533 for (i = 0; i < 5; i++)
4535 real_from_string (&ext_80387_constants_table[i], cst[i]);
4536 /* Ensure each constant is rounded to XFmode precision. */
4537 real_convert (&ext_80387_constants_table[i],
4538 XFmode, &ext_80387_constants_table[i]);
4541 ext_80387_constants_init = 1;
4544 /* Return true if the constant is something that can be loaded with
4545 a special instruction. */
4548 standard_80387_constant_p (rtx x)
4550 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4553 if (x == CONST0_RTX (GET_MODE (x)))
4555 if (x == CONST1_RTX (GET_MODE (x)))
4558 /* For XFmode constants, try to find a special 80387 instruction when
4559 optimizing for size or on those CPUs that benefit from them. */
4560 if (GET_MODE (x) == XFmode
4561 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4566 if (! ext_80387_constants_init)
4567 init_ext_80387_constants ();
4569 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4570 for (i = 0; i < 5; i++)
4571 if (real_identical (&r, &ext_80387_constants_table[i]))
4578 /* Return the opcode of the special instruction to be used to load
4582 standard_80387_constant_opcode (rtx x)
4584 switch (standard_80387_constant_p (x))
4605 /* Return the CONST_DOUBLE representing the 80387 constant that is
4606 loaded by the specified special instruction. The argument IDX
4607 matches the return value from standard_80387_constant_p. */
4610 standard_80387_constant_rtx (int idx)
4614 if (! ext_80387_constants_init)
4615 init_ext_80387_constants ();
4631 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4635 /* Return 1 if mode is a valid mode for sse. */
4637 standard_sse_mode_p (enum machine_mode mode)
4654 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4657 standard_sse_constant_p (rtx x)
4659 enum machine_mode mode = GET_MODE (x);
4661 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4663 if (vector_all_ones_operand (x, mode)
4664 && standard_sse_mode_p (mode))
4665 return TARGET_SSE2 ? 2 : -1;
4670 /* Return the opcode of the special instruction to be used to load
4674 standard_sse_constant_opcode (rtx insn, rtx x)
4676 switch (standard_sse_constant_p (x))
4679 if (get_attr_mode (insn) == MODE_V4SF)
4680 return "xorps\t%0, %0";
4681 else if (get_attr_mode (insn) == MODE_V2DF)
4682 return "xorpd\t%0, %0";
4684 return "pxor\t%0, %0";
4686 return "pcmpeqd\t%0, %0";
4691 /* Returns 1 if OP contains a symbol reference */
4694 symbolic_reference_mentioned_p (rtx op)
4699 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4702 fmt = GET_RTX_FORMAT (GET_CODE (op));
4703 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4709 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4710 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4714 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4721 /* Return 1 if it is appropriate to emit `ret' instructions in the
4722 body of a function. Do this only if the epilogue is simple, needing a
4723 couple of insns. Prior to reloading, we can't tell how many registers
4724 must be saved, so return 0 then. Return 0 if there is no frame
4725 marker to de-allocate. */
4728 ix86_can_use_return_insn_p (void)
4730 struct ix86_frame frame;
4732 if (! reload_completed || frame_pointer_needed)
4735 /* Don't allow more than 32 pop, since that's all we can do
4736 with one instruction. */
4737 if (current_function_pops_args
4738 && current_function_args_size >= 32768)
4741 ix86_compute_frame_layout (&frame);
4742 return frame.to_allocate == 0 && frame.nregs == 0;
4745 /* Value should be nonzero if functions must have frame pointers.
4746 Zero means the frame pointer need not be set up (and parms may
4747 be accessed via the stack pointer) in functions that seem suitable. */
4750 ix86_frame_pointer_required (void)
4752 /* If we accessed previous frames, then the generated code expects
4753 to be able to access the saved ebp value in our frame. */
4754 if (cfun->machine->accesses_prev_frame)
4757 /* Several x86 os'es need a frame pointer for other reasons,
4758 usually pertaining to setjmp. */
4759 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4762 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4763 the frame pointer by default. Turn it back on now if we've not
4764 got a leaf function. */
4765 if (TARGET_OMIT_LEAF_FRAME_POINTER
4766 && (!current_function_is_leaf
4767 || ix86_current_function_calls_tls_descriptor))
4770 if (current_function_profile)
4776 /* Record that the current function accesses previous call frames. */
4779 ix86_setup_frame_addresses (void)
4781 cfun->machine->accesses_prev_frame = 1;
4784 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4785 # define USE_HIDDEN_LINKONCE 1
4787 # define USE_HIDDEN_LINKONCE 0
4790 static int pic_labels_used;
4792 /* Fills in the label name that should be used for a pc thunk for
4793 the given register. */
4796 get_pc_thunk_name (char name[32], unsigned int regno)
4798 gcc_assert (!TARGET_64BIT);
4800 if (USE_HIDDEN_LINKONCE)
4801 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4803 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4807 /* This function generates code for -fpic that loads %ebx with
4808 the return address of the caller and then returns. */
4811 ix86_file_end (void)
4816 for (regno = 0; regno < 8; ++regno)
4820 if (! ((pic_labels_used >> regno) & 1))
4823 get_pc_thunk_name (name, regno);
4828 switch_to_section (darwin_sections[text_coal_section]);
4829 fputs ("\t.weak_definition\t", asm_out_file);
4830 assemble_name (asm_out_file, name);
4831 fputs ("\n\t.private_extern\t", asm_out_file);
4832 assemble_name (asm_out_file, name);
4833 fputs ("\n", asm_out_file);
4834 ASM_OUTPUT_LABEL (asm_out_file, name);
4838 if (USE_HIDDEN_LINKONCE)
4842 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4844 TREE_PUBLIC (decl) = 1;
4845 TREE_STATIC (decl) = 1;
4846 DECL_ONE_ONLY (decl) = 1;
4848 (*targetm.asm_out.unique_section) (decl, 0);
4849 switch_to_section (get_named_section (decl, NULL, 0));
4851 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4852 fputs ("\t.hidden\t", asm_out_file);
4853 assemble_name (asm_out_file, name);
4854 fputc ('\n', asm_out_file);
4855 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4859 switch_to_section (text_section);
4860 ASM_OUTPUT_LABEL (asm_out_file, name);
4863 xops[0] = gen_rtx_REG (SImode, regno);
4864 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4865 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4866 output_asm_insn ("ret", xops);
4869 if (NEED_INDICATE_EXEC_STACK)
4870 file_end_indicate_exec_stack ();
4873 /* Emit code for the SET_GOT patterns. */
4876 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4881 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4883 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4885 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4888 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4890 output_asm_insn ("call\t%a2", xops);
4893 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4894 is what will be referenced by the Mach-O PIC subsystem. */
4896 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4899 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4900 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4903 output_asm_insn ("pop{l}\t%0", xops);
4908 get_pc_thunk_name (name, REGNO (dest));
4909 pic_labels_used |= 1 << REGNO (dest);
4911 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4912 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4913 output_asm_insn ("call\t%X2", xops);
4914 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4915 is what will be referenced by the Mach-O PIC subsystem. */
4918 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4920 targetm.asm_out.internal_label (asm_out_file, "L",
4921 CODE_LABEL_NUMBER (label));
4928 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4929 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4931 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4936 /* Generate an "push" pattern for input ARG. */
4941 return gen_rtx_SET (VOIDmode,
4943 gen_rtx_PRE_DEC (Pmode,
4944 stack_pointer_rtx)),
4948 /* Return >= 0 if there is an unused call-clobbered register available
4949 for the entire function. */
4952 ix86_select_alt_pic_regnum (void)
4954 if (current_function_is_leaf && !current_function_profile
4955 && !ix86_current_function_calls_tls_descriptor)
4958 for (i = 2; i >= 0; --i)
4959 if (!regs_ever_live[i])
4963 return INVALID_REGNUM;
4966 /* Return 1 if we need to save REGNO. */
4968 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4970 if (pic_offset_table_rtx
4971 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4972 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4973 || current_function_profile
4974 || current_function_calls_eh_return
4975 || current_function_uses_const_pool))
4977 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4982 if (current_function_calls_eh_return && maybe_eh_return)
4987 unsigned test = EH_RETURN_DATA_REGNO (i);
4988 if (test == INVALID_REGNUM)
4995 if (cfun->machine->force_align_arg_pointer
4996 && regno == REGNO (cfun->machine->force_align_arg_pointer))
4999 return (regs_ever_live[regno]
5000 && !call_used_regs[regno]
5001 && !fixed_regs[regno]
5002 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5005 /* Return number of registers to be saved on the stack. */
5008 ix86_nsaved_regs (void)
5013 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5014 if (ix86_save_reg (regno, true))
5019 /* Return the offset between two registers, one to be eliminated, and the other
5020 its replacement, at the start of a routine. */
5023 ix86_initial_elimination_offset (int from, int to)
5025 struct ix86_frame frame;
5026 ix86_compute_frame_layout (&frame);
5028 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5029 return frame.hard_frame_pointer_offset;
5030 else if (from == FRAME_POINTER_REGNUM
5031 && to == HARD_FRAME_POINTER_REGNUM)
5032 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5035 gcc_assert (to == STACK_POINTER_REGNUM);
5037 if (from == ARG_POINTER_REGNUM)
5038 return frame.stack_pointer_offset;
5040 gcc_assert (from == FRAME_POINTER_REGNUM);
5041 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5045 /* Fill structure ix86_frame about frame of currently computed function. */
5048 ix86_compute_frame_layout (struct ix86_frame *frame)
5050 HOST_WIDE_INT total_size;
5051 unsigned int stack_alignment_needed;
5052 HOST_WIDE_INT offset;
5053 unsigned int preferred_alignment;
5054 HOST_WIDE_INT size = get_frame_size ();
5056 frame->nregs = ix86_nsaved_regs ();
5059 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5060 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5062 /* During reload iteration the amount of registers saved can change.
5063 Recompute the value as needed. Do not recompute when amount of registers
5064 didn't change as reload does multiple calls to the function and does not
5065 expect the decision to change within single iteration. */
5067 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5069 int count = frame->nregs;
5071 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5072 /* The fast prologue uses move instead of push to save registers. This
5073 is significantly longer, but also executes faster as modern hardware
5074 can execute the moves in parallel, but can't do that for push/pop.
5076 Be careful about choosing what prologue to emit: When function takes
5077 many instructions to execute we may use slow version as well as in
5078 case function is known to be outside hot spot (this is known with
5079 feedback only). Weight the size of function by number of registers
5080 to save as it is cheap to use one or two push instructions but very
5081 slow to use many of them. */
5083 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5084 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5085 || (flag_branch_probabilities
5086 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5087 cfun->machine->use_fast_prologue_epilogue = false;
5089 cfun->machine->use_fast_prologue_epilogue
5090 = !expensive_function_p (count);
5092 if (TARGET_PROLOGUE_USING_MOVE
5093 && cfun->machine->use_fast_prologue_epilogue)
5094 frame->save_regs_using_mov = true;
5096 frame->save_regs_using_mov = false;
5099 /* Skip return address and saved base pointer. */
5100 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5102 frame->hard_frame_pointer_offset = offset;
5104 /* Do some sanity checking of stack_alignment_needed and
5105 preferred_alignment, since i386 port is the only using those features
5106 that may break easily. */
5108 gcc_assert (!size || stack_alignment_needed);
5109 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5110 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5111 gcc_assert (stack_alignment_needed
5112 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5114 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5115 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5117 /* Register save area */
5118 offset += frame->nregs * UNITS_PER_WORD;
5121 if (ix86_save_varrargs_registers)
5123 offset += X86_64_VARARGS_SIZE;
5124 frame->va_arg_size = X86_64_VARARGS_SIZE;
5127 frame->va_arg_size = 0;
5129 /* Align start of frame for local function. */
5130 frame->padding1 = ((offset + stack_alignment_needed - 1)
5131 & -stack_alignment_needed) - offset;
5133 offset += frame->padding1;
5135 /* Frame pointer points here. */
5136 frame->frame_pointer_offset = offset;
5140 /* Add outgoing arguments area. Can be skipped if we eliminated
5141 all the function calls as dead code.
5142 Skipping is however impossible when function calls alloca. Alloca
5143 expander assumes that last current_function_outgoing_args_size
5144 of stack frame are unused. */
5145 if (ACCUMULATE_OUTGOING_ARGS
5146 && (!current_function_is_leaf || current_function_calls_alloca
5147 || ix86_current_function_calls_tls_descriptor))
5149 offset += current_function_outgoing_args_size;
5150 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5153 frame->outgoing_arguments_size = 0;
5155 /* Align stack boundary. Only needed if we're calling another function
5157 if (!current_function_is_leaf || current_function_calls_alloca
5158 || ix86_current_function_calls_tls_descriptor)
5159 frame->padding2 = ((offset + preferred_alignment - 1)
5160 & -preferred_alignment) - offset;
5162 frame->padding2 = 0;
5164 offset += frame->padding2;
5166 /* We've reached end of stack frame. */
5167 frame->stack_pointer_offset = offset;
5169 /* Size prologue needs to allocate. */
5170 frame->to_allocate =
5171 (size + frame->padding1 + frame->padding2
5172 + frame->outgoing_arguments_size + frame->va_arg_size);
5174 if ((!frame->to_allocate && frame->nregs <= 1)
5175 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5176 frame->save_regs_using_mov = false;
5178 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5179 && current_function_is_leaf
5180 && !ix86_current_function_calls_tls_descriptor)
5182 frame->red_zone_size = frame->to_allocate;
5183 if (frame->save_regs_using_mov)
5184 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5185 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5186 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5189 frame->red_zone_size = 0;
5190 frame->to_allocate -= frame->red_zone_size;
5191 frame->stack_pointer_offset -= frame->red_zone_size;
5193 fprintf (stderr, "nregs: %i\n", frame->nregs);
5194 fprintf (stderr, "size: %i\n", size);
5195 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5196 fprintf (stderr, "padding1: %i\n", frame->padding1);
5197 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5198 fprintf (stderr, "padding2: %i\n", frame->padding2);
5199 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5200 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5201 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5202 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5203 frame->hard_frame_pointer_offset);
5204 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5208 /* Emit code to save registers in the prologue. */
5211 ix86_emit_save_regs (void)
5216 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5217 if (ix86_save_reg (regno, true))
5219 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5220 RTX_FRAME_RELATED_P (insn) = 1;
5224 /* Emit code to save registers using MOV insns. First register
5225 is restored from POINTER + OFFSET. */
5227 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5232 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5233 if (ix86_save_reg (regno, true))
5235 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5237 gen_rtx_REG (Pmode, regno));
5238 RTX_FRAME_RELATED_P (insn) = 1;
5239 offset += UNITS_PER_WORD;
5243 /* Expand prologue or epilogue stack adjustment.
5244 The pattern exist to put a dependency on all ebp-based memory accesses.
5245 STYLE should be negative if instructions should be marked as frame related,
5246 zero if %r11 register is live and cannot be freely used and positive
5250 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5255 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5256 else if (x86_64_immediate_operand (offset, DImode))
5257 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5261 /* r11 is used by indirect sibcall return as well, set before the
5262 epilogue and used after the epilogue. ATM indirect sibcall
5263 shouldn't be used together with huge frame sizes in one
5264 function because of the frame_size check in sibcall.c. */
5266 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5267 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5269 RTX_FRAME_RELATED_P (insn) = 1;
5270 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5274 RTX_FRAME_RELATED_P (insn) = 1;
5277 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5280 ix86_internal_arg_pointer (void)
5282 bool has_force_align_arg_pointer =
5283 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5284 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5285 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5286 && DECL_NAME (current_function_decl)
5287 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5288 && DECL_FILE_SCOPE_P (current_function_decl))
5289 || ix86_force_align_arg_pointer
5290 || has_force_align_arg_pointer)
5292 /* Nested functions can't realign the stack due to a register
5294 if (DECL_CONTEXT (current_function_decl)
5295 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5297 if (ix86_force_align_arg_pointer)
5298 warning (0, "-mstackrealign ignored for nested functions");
5299 if (has_force_align_arg_pointer)
5300 error ("%s not supported for nested functions",
5301 ix86_force_align_arg_pointer_string);
5302 return virtual_incoming_args_rtx;
5304 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5305 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5308 return virtual_incoming_args_rtx;
5311 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5312 This is called from dwarf2out.c to emit call frame instructions
5313 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5315 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5317 rtx unspec = SET_SRC (pattern);
5318 gcc_assert (GET_CODE (unspec) == UNSPEC);
5322 case UNSPEC_REG_SAVE:
5323 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5324 SET_DEST (pattern));
5326 case UNSPEC_DEF_CFA:
5327 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5328 INTVAL (XVECEXP (unspec, 0, 0)));
5335 /* Expand the prologue into a bunch of separate insns. */
5338 ix86_expand_prologue (void)
5342 struct ix86_frame frame;
5343 HOST_WIDE_INT allocate;
5345 ix86_compute_frame_layout (&frame);
5347 if (cfun->machine->force_align_arg_pointer)
5351 /* Grab the argument pointer. */
5352 x = plus_constant (stack_pointer_rtx, 4);
5353 y = cfun->machine->force_align_arg_pointer;
5354 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5355 RTX_FRAME_RELATED_P (insn) = 1;
5357 /* The unwind info consists of two parts: install the fafp as the cfa,
5358 and record the fafp as the "save register" of the stack pointer.
5359 The later is there in order that the unwinder can see where it
5360 should restore the stack pointer across the and insn. */
5361 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5362 x = gen_rtx_SET (VOIDmode, y, x);
5363 RTX_FRAME_RELATED_P (x) = 1;
5364 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5366 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5367 RTX_FRAME_RELATED_P (y) = 1;
5368 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5369 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5370 REG_NOTES (insn) = x;
5372 /* Align the stack. */
5373 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5376 /* And here we cheat like madmen with the unwind info. We force the
5377 cfa register back to sp+4, which is exactly what it was at the
5378 start of the function. Re-pushing the return address results in
5379 the return at the same spot relative to the cfa, and thus is
5380 correct wrt the unwind info. */
5381 x = cfun->machine->force_align_arg_pointer;
5382 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5383 insn = emit_insn (gen_push (x));
5384 RTX_FRAME_RELATED_P (insn) = 1;
5387 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5388 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5389 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5390 REG_NOTES (insn) = x;
5393 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5394 slower on all targets. Also sdb doesn't like it. */
5396 if (frame_pointer_needed)
5398 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5399 RTX_FRAME_RELATED_P (insn) = 1;
5401 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5402 RTX_FRAME_RELATED_P (insn) = 1;
5405 allocate = frame.to_allocate;
5407 if (!frame.save_regs_using_mov)
5408 ix86_emit_save_regs ();
5410 allocate += frame.nregs * UNITS_PER_WORD;
5412 /* When using red zone we may start register saving before allocating
5413 the stack frame saving one cycle of the prologue. */
5414 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5415 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5416 : stack_pointer_rtx,
5417 -frame.nregs * UNITS_PER_WORD);
5421 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5422 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5423 GEN_INT (-allocate), -1);
5426 /* Only valid for Win32. */
5427 rtx eax = gen_rtx_REG (SImode, 0);
5428 bool eax_live = ix86_eax_live_at_start_p ();
5431 gcc_assert (!TARGET_64BIT);
5435 emit_insn (gen_push (eax));
5439 emit_move_insn (eax, GEN_INT (allocate));
5441 insn = emit_insn (gen_allocate_stack_worker (eax));
5442 RTX_FRAME_RELATED_P (insn) = 1;
5443 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5444 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5445 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5446 t, REG_NOTES (insn));
5450 if (frame_pointer_needed)
5451 t = plus_constant (hard_frame_pointer_rtx,
5454 - frame.nregs * UNITS_PER_WORD);
5456 t = plus_constant (stack_pointer_rtx, allocate);
5457 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5461 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5463 if (!frame_pointer_needed || !frame.to_allocate)
5464 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5466 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5467 -frame.nregs * UNITS_PER_WORD);
5470 pic_reg_used = false;
5471 if (pic_offset_table_rtx
5472 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5473 || current_function_profile))
5475 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5477 if (alt_pic_reg_used != INVALID_REGNUM)
5478 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5480 pic_reg_used = true;
5486 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5488 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5490 /* Even with accurate pre-reload life analysis, we can wind up
5491 deleting all references to the pic register after reload.
5492 Consider if cross-jumping unifies two sides of a branch
5493 controlled by a comparison vs the only read from a global.
5494 In which case, allow the set_got to be deleted, though we're
5495 too late to do anything about the ebx save in the prologue. */
5496 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5499 /* Prevent function calls from be scheduled before the call to mcount.
5500 In the pic_reg_used case, make sure that the got load isn't deleted. */
5501 if (current_function_profile)
5502 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5505 /* Emit code to restore saved registers using MOV insns. First register
5506 is restored from POINTER + OFFSET. */
5508 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5509 int maybe_eh_return)
5512 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5514 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5515 if (ix86_save_reg (regno, maybe_eh_return))
5517 /* Ensure that adjust_address won't be forced to produce pointer
5518 out of range allowed by x86-64 instruction set. */
5519 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5523 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5524 emit_move_insn (r11, GEN_INT (offset));
5525 emit_insn (gen_adddi3 (r11, r11, pointer));
5526 base_address = gen_rtx_MEM (Pmode, r11);
5529 emit_move_insn (gen_rtx_REG (Pmode, regno),
5530 adjust_address (base_address, Pmode, offset));
5531 offset += UNITS_PER_WORD;
5535 /* Restore function stack, frame, and registers. */
5538 ix86_expand_epilogue (int style)
5541 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5542 struct ix86_frame frame;
5543 HOST_WIDE_INT offset;
5545 ix86_compute_frame_layout (&frame);
5547 /* Calculate start of saved registers relative to ebp. Special care
5548 must be taken for the normal return case of a function using
5549 eh_return: the eax and edx registers are marked as saved, but not
5550 restored along this path. */
5551 offset = frame.nregs;
5552 if (current_function_calls_eh_return && style != 2)
5554 offset *= -UNITS_PER_WORD;
5556 /* If we're only restoring one register and sp is not valid then
5557 using a move instruction to restore the register since it's
5558 less work than reloading sp and popping the register.
5560 The default code result in stack adjustment using add/lea instruction,
5561 while this code results in LEAVE instruction (or discrete equivalent),
5562 so it is profitable in some other cases as well. Especially when there
5563 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5564 and there is exactly one register to pop. This heuristic may need some
5565 tuning in future. */
5566 if ((!sp_valid && frame.nregs <= 1)
5567 || (TARGET_EPILOGUE_USING_MOVE
5568 && cfun->machine->use_fast_prologue_epilogue
5569 && (frame.nregs > 1 || frame.to_allocate))
5570 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5571 || (frame_pointer_needed && TARGET_USE_LEAVE
5572 && cfun->machine->use_fast_prologue_epilogue
5573 && frame.nregs == 1)
5574 || current_function_calls_eh_return)
5576 /* Restore registers. We can use ebp or esp to address the memory
5577 locations. If both are available, default to ebp, since offsets
5578 are known to be small. Only exception is esp pointing directly to the
5579 end of block of saved registers, where we may simplify addressing
5582 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5583 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5584 frame.to_allocate, style == 2);
5586 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5587 offset, style == 2);
5589 /* eh_return epilogues need %ecx added to the stack pointer. */
5592 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5594 if (frame_pointer_needed)
5596 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5597 tmp = plus_constant (tmp, UNITS_PER_WORD);
5598 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5600 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5601 emit_move_insn (hard_frame_pointer_rtx, tmp);
5603 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5608 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5609 tmp = plus_constant (tmp, (frame.to_allocate
5610 + frame.nregs * UNITS_PER_WORD));
5611 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5614 else if (!frame_pointer_needed)
5615 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5616 GEN_INT (frame.to_allocate
5617 + frame.nregs * UNITS_PER_WORD),
5619 /* If not an i386, mov & pop is faster than "leave". */
5620 else if (TARGET_USE_LEAVE || optimize_size
5621 || !cfun->machine->use_fast_prologue_epilogue)
5622 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5625 pro_epilogue_adjust_stack (stack_pointer_rtx,
5626 hard_frame_pointer_rtx,
5629 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5631 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5636 /* First step is to deallocate the stack frame so that we can
5637 pop the registers. */
5640 gcc_assert (frame_pointer_needed);
5641 pro_epilogue_adjust_stack (stack_pointer_rtx,
5642 hard_frame_pointer_rtx,
5643 GEN_INT (offset), style);
5645 else if (frame.to_allocate)
5646 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5647 GEN_INT (frame.to_allocate), style);
5649 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5650 if (ix86_save_reg (regno, false))
5653 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5655 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5657 if (frame_pointer_needed)
5659 /* Leave results in shorter dependency chains on CPUs that are
5660 able to grok it fast. */
5661 if (TARGET_USE_LEAVE)
5662 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5663 else if (TARGET_64BIT)
5664 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5666 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5670 if (cfun->machine->force_align_arg_pointer)
5672 emit_insn (gen_addsi3 (stack_pointer_rtx,
5673 cfun->machine->force_align_arg_pointer,
5677 /* Sibcall epilogues don't want a return instruction. */
5681 if (current_function_pops_args && current_function_args_size)
5683 rtx popc = GEN_INT (current_function_pops_args);
5685 /* i386 can only pop 64K bytes. If asked to pop more, pop
5686 return address, do explicit add, and jump indirectly to the
5689 if (current_function_pops_args >= 65536)
5691 rtx ecx = gen_rtx_REG (SImode, 2);
5693 /* There is no "pascal" calling convention in 64bit ABI. */
5694 gcc_assert (!TARGET_64BIT);
5696 emit_insn (gen_popsi1 (ecx));
5697 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5698 emit_jump_insn (gen_return_indirect_internal (ecx));
5701 emit_jump_insn (gen_return_pop_internal (popc));
5704 emit_jump_insn (gen_return_internal ());
5707 /* Reset from the function's potential modifications. */
5710 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5711 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5713 if (pic_offset_table_rtx)
5714 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5716 /* Mach-O doesn't support labels at the end of objects, so if
5717 it looks like we might want one, insert a NOP. */
5719 rtx insn = get_last_insn ();
5722 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5723 insn = PREV_INSN (insn);
5727 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5728 fputs ("\tnop\n", file);
5734 /* Extract the parts of an RTL expression that is a valid memory address
5735 for an instruction. Return 0 if the structure of the address is
5736 grossly off. Return -1 if the address contains ASHIFT, so it is not
5737 strictly valid, but still used for computing length of lea instruction. */
5740 ix86_decompose_address (rtx addr, struct ix86_address *out)
5742 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5743 rtx base_reg, index_reg;
5744 HOST_WIDE_INT scale = 1;
5745 rtx scale_rtx = NULL_RTX;
5747 enum ix86_address_seg seg = SEG_DEFAULT;
5749 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5751 else if (GET_CODE (addr) == PLUS)
5761 addends[n++] = XEXP (op, 1);
5764 while (GET_CODE (op) == PLUS);
5769 for (i = n; i >= 0; --i)
5772 switch (GET_CODE (op))
5777 index = XEXP (op, 0);
5778 scale_rtx = XEXP (op, 1);
5782 if (XINT (op, 1) == UNSPEC_TP
5783 && TARGET_TLS_DIRECT_SEG_REFS
5784 && seg == SEG_DEFAULT)
5785 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5814 else if (GET_CODE (addr) == MULT)
5816 index = XEXP (addr, 0); /* index*scale */
5817 scale_rtx = XEXP (addr, 1);
5819 else if (GET_CODE (addr) == ASHIFT)
5823 /* We're called for lea too, which implements ashift on occasion. */
5824 index = XEXP (addr, 0);
5825 tmp = XEXP (addr, 1);
5826 if (GET_CODE (tmp) != CONST_INT)
5828 scale = INTVAL (tmp);
5829 if ((unsigned HOST_WIDE_INT) scale > 3)
5835 disp = addr; /* displacement */
5837 /* Extract the integral value of scale. */
5840 if (GET_CODE (scale_rtx) != CONST_INT)
5842 scale = INTVAL (scale_rtx);
5845 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5846 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5848 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5849 if (base_reg && index_reg && scale == 1
5850 && (index_reg == arg_pointer_rtx
5851 || index_reg == frame_pointer_rtx
5852 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5855 tmp = base, base = index, index = tmp;
5856 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5859 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5860 if ((base_reg == hard_frame_pointer_rtx
5861 || base_reg == frame_pointer_rtx
5862 || base_reg == arg_pointer_rtx) && !disp)
5865 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5866 Avoid this by transforming to [%esi+0]. */
5867 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5868 && base_reg && !index_reg && !disp
5870 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5873 /* Special case: encode reg+reg instead of reg*2. */
5874 if (!base && index && scale && scale == 2)
5875 base = index, base_reg = index_reg, scale = 1;
5877 /* Special case: scaling cannot be encoded without base or displacement. */
5878 if (!base && !disp && index && scale != 1)
5890 /* Return cost of the memory address x.
5891 For i386, it is better to use a complex address than let gcc copy
5892 the address into a reg and make a new pseudo. But not if the address
5893 requires to two regs - that would mean more pseudos with longer
5896 ix86_address_cost (rtx x)
5898 struct ix86_address parts;
5900 int ok = ix86_decompose_address (x, &parts);
5904 if (parts.base && GET_CODE (parts.base) == SUBREG)
5905 parts.base = SUBREG_REG (parts.base);
5906 if (parts.index && GET_CODE (parts.index) == SUBREG)
5907 parts.index = SUBREG_REG (parts.index);
5909 /* More complex memory references are better. */
5910 if (parts.disp && parts.disp != const0_rtx)
5912 if (parts.seg != SEG_DEFAULT)
5915 /* Attempt to minimize number of registers in the address. */
5917 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5919 && (!REG_P (parts.index)
5920 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5924 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5926 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5927 && parts.base != parts.index)
5930 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5931 since it's predecode logic can't detect the length of instructions
5932 and it degenerates to vector decoded. Increase cost of such
5933 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5934 to split such addresses or even refuse such addresses at all.
5936 Following addressing modes are affected:
5941 The first and last case may be avoidable by explicitly coding the zero in
5942 memory address, but I don't have AMD-K6 machine handy to check this
5946 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5947 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5948 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5954 /* If X is a machine specific address (i.e. a symbol or label being
5955 referenced as a displacement from the GOT implemented using an
5956 UNSPEC), then return the base term. Otherwise return X. */
5959 ix86_find_base_term (rtx x)
5965 if (GET_CODE (x) != CONST)
5968 if (GET_CODE (term) == PLUS
5969 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5970 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5971 term = XEXP (term, 0);
5972 if (GET_CODE (term) != UNSPEC
5973 || XINT (term, 1) != UNSPEC_GOTPCREL)
5976 term = XVECEXP (term, 0, 0);
5978 if (GET_CODE (term) != SYMBOL_REF
5979 && GET_CODE (term) != LABEL_REF)
5985 term = ix86_delegitimize_address (x);
5987 if (GET_CODE (term) != SYMBOL_REF
5988 && GET_CODE (term) != LABEL_REF)
5994 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5995 this is used for to form addresses to local data when -fPIC is in
5999 darwin_local_data_pic (rtx disp)
6001 if (GET_CODE (disp) == MINUS)
6003 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6004 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6005 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6007 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6008 if (! strcmp (sym_name, "<pic base>"))
6016 /* Determine if a given RTX is a valid constant. We already know this
6017 satisfies CONSTANT_P. */
6020 legitimate_constant_p (rtx x)
6022 switch (GET_CODE (x))
6027 if (GET_CODE (x) == PLUS)
6029 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6034 if (TARGET_MACHO && darwin_local_data_pic (x))
6037 /* Only some unspecs are valid as "constants". */
6038 if (GET_CODE (x) == UNSPEC)
6039 switch (XINT (x, 1))
6042 return TARGET_64BIT;
6045 x = XVECEXP (x, 0, 0);
6046 return (GET_CODE (x) == SYMBOL_REF
6047 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6049 x = XVECEXP (x, 0, 0);
6050 return (GET_CODE (x) == SYMBOL_REF
6051 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6056 /* We must have drilled down to a symbol. */
6057 if (GET_CODE (x) == LABEL_REF)
6059 if (GET_CODE (x) != SYMBOL_REF)
6064 /* TLS symbols are never valid. */
6065 if (SYMBOL_REF_TLS_MODEL (x))
6070 if (GET_MODE (x) == TImode
6071 && x != CONST0_RTX (TImode)
6077 if (x == CONST0_RTX (GET_MODE (x)))
6085 /* Otherwise we handle everything else in the move patterns. */
6089 /* Determine if it's legal to put X into the constant pool. This
6090 is not possible for the address of thread-local symbols, which
6091 is checked above. */
6094 ix86_cannot_force_const_mem (rtx x)
6096 /* We can always put integral constants and vectors in memory. */
6097 switch (GET_CODE (x))
6107 return !legitimate_constant_p (x);
6110 /* Determine if a given RTX is a valid constant address. */
6113 constant_address_p (rtx x)
6115 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6118 /* Nonzero if the constant value X is a legitimate general operand
6119 when generating PIC code. It is given that flag_pic is on and
6120 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6123 legitimate_pic_operand_p (rtx x)
6127 switch (GET_CODE (x))
6130 inner = XEXP (x, 0);
6131 if (GET_CODE (inner) == PLUS
6132 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6133 inner = XEXP (inner, 0);
6135 /* Only some unspecs are valid as "constants". */
6136 if (GET_CODE (inner) == UNSPEC)
6137 switch (XINT (inner, 1))
6140 return TARGET_64BIT;
6142 x = XVECEXP (inner, 0, 0);
6143 return (GET_CODE (x) == SYMBOL_REF
6144 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6152 return legitimate_pic_address_disp_p (x);
6159 /* Determine if a given CONST RTX is a valid memory displacement
6163 legitimate_pic_address_disp_p (rtx disp)
6167 /* In 64bit mode we can allow direct addresses of symbols and labels
6168 when they are not dynamic symbols. */
6171 rtx op0 = disp, op1;
6173 switch (GET_CODE (disp))
6179 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6181 op0 = XEXP (XEXP (disp, 0), 0);
6182 op1 = XEXP (XEXP (disp, 0), 1);
6183 if (GET_CODE (op1) != CONST_INT
6184 || INTVAL (op1) >= 16*1024*1024
6185 || INTVAL (op1) < -16*1024*1024)
6187 if (GET_CODE (op0) == LABEL_REF)
6189 if (GET_CODE (op0) != SYMBOL_REF)
6194 /* TLS references should always be enclosed in UNSPEC. */
6195 if (SYMBOL_REF_TLS_MODEL (op0))
6197 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6205 if (GET_CODE (disp) != CONST)
6207 disp = XEXP (disp, 0);
6211 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6212 of GOT tables. We should not need these anyway. */
6213 if (GET_CODE (disp) != UNSPEC
6214 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6215 && XINT (disp, 1) != UNSPEC_GOTOFF))
6218 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6219 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6225 if (GET_CODE (disp) == PLUS)
6227 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6229 disp = XEXP (disp, 0);
6233 if (TARGET_MACHO && darwin_local_data_pic (disp))
6236 if (GET_CODE (disp) != UNSPEC)
6239 switch (XINT (disp, 1))
6244 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6246 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6247 While ABI specify also 32bit relocation but we don't produce it in
6248 small PIC model at all. */
6249 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6250 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6252 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6254 case UNSPEC_GOTTPOFF:
6255 case UNSPEC_GOTNTPOFF:
6256 case UNSPEC_INDNTPOFF:
6259 disp = XVECEXP (disp, 0, 0);
6260 return (GET_CODE (disp) == SYMBOL_REF
6261 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6263 disp = XVECEXP (disp, 0, 0);
6264 return (GET_CODE (disp) == SYMBOL_REF
6265 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6267 disp = XVECEXP (disp, 0, 0);
6268 return (GET_CODE (disp) == SYMBOL_REF
6269 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6275 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6276 memory address for an instruction. The MODE argument is the machine mode
6277 for the MEM expression that wants to use this address.
6279 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6280 convert common non-canonical forms to canonical form so that they will
6284 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6286 struct ix86_address parts;
6287 rtx base, index, disp;
6288 HOST_WIDE_INT scale;
6289 const char *reason = NULL;
6290 rtx reason_rtx = NULL_RTX;
6292 if (TARGET_DEBUG_ADDR)
6295 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6296 GET_MODE_NAME (mode), strict);
6300 if (ix86_decompose_address (addr, &parts) <= 0)
6302 reason = "decomposition failed";
6307 index = parts.index;
6309 scale = parts.scale;
6311 /* Validate base register.
6313 Don't allow SUBREG's that span more than a word here. It can lead to spill
6314 failures when the base is one word out of a two word structure, which is
6315 represented internally as a DImode int. */
6324 else if (GET_CODE (base) == SUBREG
6325 && REG_P (SUBREG_REG (base))
6326 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6328 reg = SUBREG_REG (base);
6331 reason = "base is not a register";
6335 if (GET_MODE (base) != Pmode)
6337 reason = "base is not in Pmode";
6341 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6342 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6344 reason = "base is not valid";
6349 /* Validate index register.
6351 Don't allow SUBREG's that span more than a word here -- same as above. */
6360 else if (GET_CODE (index) == SUBREG
6361 && REG_P (SUBREG_REG (index))
6362 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6364 reg = SUBREG_REG (index);
6367 reason = "index is not a register";
6371 if (GET_MODE (index) != Pmode)
6373 reason = "index is not in Pmode";
6377 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6378 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6380 reason = "index is not valid";
6385 /* Validate scale factor. */
6388 reason_rtx = GEN_INT (scale);
6391 reason = "scale without index";
6395 if (scale != 2 && scale != 4 && scale != 8)
6397 reason = "scale is not a valid multiplier";
6402 /* Validate displacement. */
6407 if (GET_CODE (disp) == CONST
6408 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6409 switch (XINT (XEXP (disp, 0), 1))
6411 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6412 used. While ABI specify also 32bit relocations, we don't produce
6413 them at all and use IP relative instead. */
6416 gcc_assert (flag_pic);
6418 goto is_legitimate_pic;
6419 reason = "64bit address unspec";
6422 case UNSPEC_GOTPCREL:
6423 gcc_assert (flag_pic);
6424 goto is_legitimate_pic;
6426 case UNSPEC_GOTTPOFF:
6427 case UNSPEC_GOTNTPOFF:
6428 case UNSPEC_INDNTPOFF:
6434 reason = "invalid address unspec";
6438 else if (SYMBOLIC_CONST (disp)
6442 && MACHOPIC_INDIRECT
6443 && !machopic_operand_p (disp)
6449 if (TARGET_64BIT && (index || base))
6451 /* foo@dtpoff(%rX) is ok. */
6452 if (GET_CODE (disp) != CONST
6453 || GET_CODE (XEXP (disp, 0)) != PLUS
6454 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6455 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6456 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6457 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6459 reason = "non-constant pic memory reference";
6463 else if (! legitimate_pic_address_disp_p (disp))
6465 reason = "displacement is an invalid pic construct";
6469 /* This code used to verify that a symbolic pic displacement
6470 includes the pic_offset_table_rtx register.
6472 While this is good idea, unfortunately these constructs may
6473 be created by "adds using lea" optimization for incorrect
6482 This code is nonsensical, but results in addressing
6483 GOT table with pic_offset_table_rtx base. We can't
6484 just refuse it easily, since it gets matched by
6485 "addsi3" pattern, that later gets split to lea in the
6486 case output register differs from input. While this
6487 can be handled by separate addsi pattern for this case
6488 that never results in lea, this seems to be easier and
6489 correct fix for crash to disable this test. */
6491 else if (GET_CODE (disp) != LABEL_REF
6492 && GET_CODE (disp) != CONST_INT
6493 && (GET_CODE (disp) != CONST
6494 || !legitimate_constant_p (disp))
6495 && (GET_CODE (disp) != SYMBOL_REF
6496 || !legitimate_constant_p (disp)))
6498 reason = "displacement is not constant";
6501 else if (TARGET_64BIT
6502 && !x86_64_immediate_operand (disp, VOIDmode))
6504 reason = "displacement is out of range";
6509 /* Everything looks valid. */
6510 if (TARGET_DEBUG_ADDR)
6511 fprintf (stderr, "Success.\n");
6515 if (TARGET_DEBUG_ADDR)
6517 fprintf (stderr, "Error: %s\n", reason);
6518 debug_rtx (reason_rtx);
6523 /* Return a unique alias set for the GOT. */
6525 static HOST_WIDE_INT
6526 ix86_GOT_alias_set (void)
6528 static HOST_WIDE_INT set = -1;
6530 set = new_alias_set ();
6534 /* Return a legitimate reference for ORIG (an address) using the
6535 register REG. If REG is 0, a new pseudo is generated.
6537 There are two types of references that must be handled:
6539 1. Global data references must load the address from the GOT, via
6540 the PIC reg. An insn is emitted to do this load, and the reg is
6543 2. Static data references, constant pool addresses, and code labels
6544 compute the address as an offset from the GOT, whose base is in
6545 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6546 differentiate them from global data objects. The returned
6547 address is the PIC reg + an unspec constant.
6549 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6550 reg also appears in the address. */
6553 legitimize_pic_address (rtx orig, rtx reg)
6560 if (TARGET_MACHO && !TARGET_64BIT)
6563 reg = gen_reg_rtx (Pmode);
6564 /* Use the generic Mach-O PIC machinery. */
6565 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6569 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6571 else if (TARGET_64BIT
6572 && ix86_cmodel != CM_SMALL_PIC
6573 && local_symbolic_operand (addr, Pmode))
6576 /* This symbol may be referenced via a displacement from the PIC
6577 base address (@GOTOFF). */
6579 if (reload_in_progress)
6580 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6581 if (GET_CODE (addr) == CONST)
6582 addr = XEXP (addr, 0);
6583 if (GET_CODE (addr) == PLUS)
6585 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6586 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6589 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6590 new = gen_rtx_CONST (Pmode, new);
6592 tmpreg = gen_reg_rtx (Pmode);
6595 emit_move_insn (tmpreg, new);
6599 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6600 tmpreg, 1, OPTAB_DIRECT);
6603 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6605 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6607 /* This symbol may be referenced via a displacement from the PIC
6608 base address (@GOTOFF). */
6610 if (reload_in_progress)
6611 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6612 if (GET_CODE (addr) == CONST)
6613 addr = XEXP (addr, 0);
6614 if (GET_CODE (addr) == PLUS)
6616 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6617 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6620 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6621 new = gen_rtx_CONST (Pmode, new);
6622 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6626 emit_move_insn (reg, new);
6630 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6634 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6635 new = gen_rtx_CONST (Pmode, new);
6636 new = gen_const_mem (Pmode, new);
6637 set_mem_alias_set (new, ix86_GOT_alias_set ());
6640 reg = gen_reg_rtx (Pmode);
6641 /* Use directly gen_movsi, otherwise the address is loaded
6642 into register for CSE. We don't want to CSE this addresses,
6643 instead we CSE addresses from the GOT table, so skip this. */
6644 emit_insn (gen_movsi (reg, new));
6649 /* This symbol must be referenced via a load from the
6650 Global Offset Table (@GOT). */
6652 if (reload_in_progress)
6653 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6654 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6655 new = gen_rtx_CONST (Pmode, new);
6656 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6657 new = gen_const_mem (Pmode, new);
6658 set_mem_alias_set (new, ix86_GOT_alias_set ());
6661 reg = gen_reg_rtx (Pmode);
6662 emit_move_insn (reg, new);
6668 if (GET_CODE (addr) == CONST_INT
6669 && !x86_64_immediate_operand (addr, VOIDmode))
6673 emit_move_insn (reg, addr);
6677 new = force_reg (Pmode, addr);
6679 else if (GET_CODE (addr) == CONST)
6681 addr = XEXP (addr, 0);
6683 /* We must match stuff we generate before. Assume the only
6684 unspecs that can get here are ours. Not that we could do
6685 anything with them anyway.... */
6686 if (GET_CODE (addr) == UNSPEC
6687 || (GET_CODE (addr) == PLUS
6688 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6690 gcc_assert (GET_CODE (addr) == PLUS);
6692 if (GET_CODE (addr) == PLUS)
6694 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6696 /* Check first to see if this is a constant offset from a @GOTOFF
6697 symbol reference. */
6698 if (local_symbolic_operand (op0, Pmode)
6699 && GET_CODE (op1) == CONST_INT)
6703 if (reload_in_progress)
6704 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6705 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6707 new = gen_rtx_PLUS (Pmode, new, op1);
6708 new = gen_rtx_CONST (Pmode, new);
6709 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6713 emit_move_insn (reg, new);
6719 if (INTVAL (op1) < -16*1024*1024
6720 || INTVAL (op1) >= 16*1024*1024)
6722 if (!x86_64_immediate_operand (op1, Pmode))
6723 op1 = force_reg (Pmode, op1);
6724 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6730 base = legitimize_pic_address (XEXP (addr, 0), reg);
6731 new = legitimize_pic_address (XEXP (addr, 1),
6732 base == reg ? NULL_RTX : reg);
6734 if (GET_CODE (new) == CONST_INT)
6735 new = plus_constant (base, INTVAL (new));
6738 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6740 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6741 new = XEXP (new, 1);
6743 new = gen_rtx_PLUS (Pmode, base, new);
6751 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6754 get_thread_pointer (int to_reg)
6758 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6762 reg = gen_reg_rtx (Pmode);
6763 insn = gen_rtx_SET (VOIDmode, reg, tp);
6764 insn = emit_insn (insn);
6769 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6770 false if we expect this to be used for a memory address and true if
6771 we expect to load the address into a register. */
6774 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6776 rtx dest, base, off, pic, tp;
6781 case TLS_MODEL_GLOBAL_DYNAMIC:
6782 dest = gen_reg_rtx (Pmode);
6783 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6785 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6787 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6790 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6791 insns = get_insns ();
6794 emit_libcall_block (insns, dest, rax, x);
6796 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6797 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6799 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6801 if (TARGET_GNU2_TLS)
6803 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6805 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6809 case TLS_MODEL_LOCAL_DYNAMIC:
6810 base = gen_reg_rtx (Pmode);
6811 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6813 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6815 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6818 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6819 insns = get_insns ();
6822 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6823 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6824 emit_libcall_block (insns, base, rax, note);
6826 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6827 emit_insn (gen_tls_local_dynamic_base_64 (base));
6829 emit_insn (gen_tls_local_dynamic_base_32 (base));
6831 if (TARGET_GNU2_TLS)
6833 rtx x = ix86_tls_module_base ();
6835 set_unique_reg_note (get_last_insn (), REG_EQUIV,
6836 gen_rtx_MINUS (Pmode, x, tp));
6839 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6840 off = gen_rtx_CONST (Pmode, off);
6842 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6844 if (TARGET_GNU2_TLS)
6846 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
6848 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6853 case TLS_MODEL_INITIAL_EXEC:
6857 type = UNSPEC_GOTNTPOFF;
6861 if (reload_in_progress)
6862 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6863 pic = pic_offset_table_rtx;
6864 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6866 else if (!TARGET_ANY_GNU_TLS)
6868 pic = gen_reg_rtx (Pmode);
6869 emit_insn (gen_set_got (pic));
6870 type = UNSPEC_GOTTPOFF;
6875 type = UNSPEC_INDNTPOFF;
6878 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6879 off = gen_rtx_CONST (Pmode, off);
6881 off = gen_rtx_PLUS (Pmode, pic, off);
6882 off = gen_const_mem (Pmode, off);
6883 set_mem_alias_set (off, ix86_GOT_alias_set ());
6885 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6887 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6888 off = force_reg (Pmode, off);
6889 return gen_rtx_PLUS (Pmode, base, off);
6893 base = get_thread_pointer (true);
6894 dest = gen_reg_rtx (Pmode);
6895 emit_insn (gen_subsi3 (dest, base, off));
6899 case TLS_MODEL_LOCAL_EXEC:
6900 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6901 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6902 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6903 off = gen_rtx_CONST (Pmode, off);
6905 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6907 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6908 return gen_rtx_PLUS (Pmode, base, off);
6912 base = get_thread_pointer (true);
6913 dest = gen_reg_rtx (Pmode);
6914 emit_insn (gen_subsi3 (dest, base, off));
6925 /* Try machine-dependent ways of modifying an illegitimate address
6926 to be legitimate. If we find one, return the new, valid address.
6927 This macro is used in only one place: `memory_address' in explow.c.
6929 OLDX is the address as it was before break_out_memory_refs was called.
6930 In some cases it is useful to look at this to decide what needs to be done.
6932 MODE and WIN are passed so that this macro can use
6933 GO_IF_LEGITIMATE_ADDRESS.
6935 It is always safe for this macro to do nothing. It exists to recognize
6936 opportunities to optimize the output.
6938 For the 80386, we handle X+REG by loading X into a register R and
6939 using R+REG. R will go in a general reg and indexing will be used.
6940 However, if REG is a broken-out memory address or multiplication,
6941 nothing needs to be done because REG can certainly go in a general reg.
6943 When -fpic is used, special handling is needed for symbolic references.
6944 See comments by legitimize_pic_address in i386.c for details. */
6947 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6952 if (TARGET_DEBUG_ADDR)
6954 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6955 GET_MODE_NAME (mode));
6959 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6961 return legitimize_tls_address (x, log, false);
6962 if (GET_CODE (x) == CONST
6963 && GET_CODE (XEXP (x, 0)) == PLUS
6964 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6965 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6967 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6968 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6971 if (flag_pic && SYMBOLIC_CONST (x))
6972 return legitimize_pic_address (x, 0);
6974 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6975 if (GET_CODE (x) == ASHIFT
6976 && GET_CODE (XEXP (x, 1)) == CONST_INT
6977 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
6980 log = INTVAL (XEXP (x, 1));
6981 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6982 GEN_INT (1 << log));
6985 if (GET_CODE (x) == PLUS)
6987 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6989 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6990 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6991 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
6994 log = INTVAL (XEXP (XEXP (x, 0), 1));
6995 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6996 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6997 GEN_INT (1 << log));
7000 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7001 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7002 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7005 log = INTVAL (XEXP (XEXP (x, 1), 1));
7006 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7007 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7008 GEN_INT (1 << log));
7011 /* Put multiply first if it isn't already. */
7012 if (GET_CODE (XEXP (x, 1)) == MULT)
7014 rtx tmp = XEXP (x, 0);
7015 XEXP (x, 0) = XEXP (x, 1);
7020 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7021 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7022 created by virtual register instantiation, register elimination, and
7023 similar optimizations. */
7024 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7027 x = gen_rtx_PLUS (Pmode,
7028 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7029 XEXP (XEXP (x, 1), 0)),
7030 XEXP (XEXP (x, 1), 1));
7034 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7035 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7036 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7037 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7038 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7039 && CONSTANT_P (XEXP (x, 1)))
7042 rtx other = NULL_RTX;
7044 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7046 constant = XEXP (x, 1);
7047 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7049 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7051 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7052 other = XEXP (x, 1);
7060 x = gen_rtx_PLUS (Pmode,
7061 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7062 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7063 plus_constant (other, INTVAL (constant)));
7067 if (changed && legitimate_address_p (mode, x, FALSE))
7070 if (GET_CODE (XEXP (x, 0)) == MULT)
7073 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7076 if (GET_CODE (XEXP (x, 1)) == MULT)
7079 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7083 && GET_CODE (XEXP (x, 1)) == REG
7084 && GET_CODE (XEXP (x, 0)) == REG)
7087 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7090 x = legitimize_pic_address (x, 0);
7093 if (changed && legitimate_address_p (mode, x, FALSE))
7096 if (GET_CODE (XEXP (x, 0)) == REG)
7098 rtx temp = gen_reg_rtx (Pmode);
7099 rtx val = force_operand (XEXP (x, 1), temp);
7101 emit_move_insn (temp, val);
7107 else if (GET_CODE (XEXP (x, 1)) == REG)
7109 rtx temp = gen_reg_rtx (Pmode);
7110 rtx val = force_operand (XEXP (x, 0), temp);
7112 emit_move_insn (temp, val);
7122 /* Print an integer constant expression in assembler syntax. Addition
7123 and subtraction are the only arithmetic that may appear in these
7124 expressions. FILE is the stdio stream to write to, X is the rtx, and
7125 CODE is the operand print code from the output string. */
7128 output_pic_addr_const (FILE *file, rtx x, int code)
7132 switch (GET_CODE (x))
7135 gcc_assert (flag_pic);
7140 output_addr_const (file, x);
7141 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7142 fputs ("@PLT", file);
7149 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7150 assemble_name (asm_out_file, buf);
7154 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7158 /* This used to output parentheses around the expression,
7159 but that does not work on the 386 (either ATT or BSD assembler). */
7160 output_pic_addr_const (file, XEXP (x, 0), code);
7164 if (GET_MODE (x) == VOIDmode)
7166 /* We can use %d if the number is <32 bits and positive. */
7167 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7168 fprintf (file, "0x%lx%08lx",
7169 (unsigned long) CONST_DOUBLE_HIGH (x),
7170 (unsigned long) CONST_DOUBLE_LOW (x));
7172 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7175 /* We can't handle floating point constants;
7176 PRINT_OPERAND must handle them. */
7177 output_operand_lossage ("floating constant misused");
7181 /* Some assemblers need integer constants to appear first. */
7182 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7184 output_pic_addr_const (file, XEXP (x, 0), code);
7186 output_pic_addr_const (file, XEXP (x, 1), code);
7190 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7191 output_pic_addr_const (file, XEXP (x, 1), code);
7193 output_pic_addr_const (file, XEXP (x, 0), code);
7199 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7200 output_pic_addr_const (file, XEXP (x, 0), code);
7202 output_pic_addr_const (file, XEXP (x, 1), code);
7204 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7208 gcc_assert (XVECLEN (x, 0) == 1);
7209 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7210 switch (XINT (x, 1))
7213 fputs ("@GOT", file);
7216 fputs ("@GOTOFF", file);
7218 case UNSPEC_GOTPCREL:
7219 fputs ("@GOTPCREL(%rip)", file);
7221 case UNSPEC_GOTTPOFF:
7222 /* FIXME: This might be @TPOFF in Sun ld too. */
7223 fputs ("@GOTTPOFF", file);
7226 fputs ("@TPOFF", file);
7230 fputs ("@TPOFF", file);
7232 fputs ("@NTPOFF", file);
7235 fputs ("@DTPOFF", file);
7237 case UNSPEC_GOTNTPOFF:
7239 fputs ("@GOTTPOFF(%rip)", file);
7241 fputs ("@GOTNTPOFF", file);
7243 case UNSPEC_INDNTPOFF:
7244 fputs ("@INDNTPOFF", file);
7247 output_operand_lossage ("invalid UNSPEC as operand");
7253 output_operand_lossage ("invalid expression as operand");
7257 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7258 We need to emit DTP-relative relocations. */
7261 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7263 fputs (ASM_LONG, file);
7264 output_addr_const (file, x);
7265 fputs ("@DTPOFF", file);
7271 fputs (", 0", file);
7278 /* In the name of slightly smaller debug output, and to cater to
7279 general assembler lossage, recognize PIC+GOTOFF and turn it back
7280 into a direct symbol reference.
7282 On Darwin, this is necessary to avoid a crash, because Darwin
7283 has a different PIC label for each routine but the DWARF debugging
7284 information is not associated with any particular routine, so it's
7285 necessary to remove references to the PIC label from RTL stored by
7286 the DWARF output code. */
7289 ix86_delegitimize_address (rtx orig_x)
7292 /* reg_addend is NULL or a multiple of some register. */
7293 rtx reg_addend = NULL_RTX;
7294 /* const_addend is NULL or a const_int. */
7295 rtx const_addend = NULL_RTX;
7296 /* This is the result, or NULL. */
7297 rtx result = NULL_RTX;
7299 if (GET_CODE (x) == MEM)
7304 if (GET_CODE (x) != CONST
7305 || GET_CODE (XEXP (x, 0)) != UNSPEC
7306 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7307 || GET_CODE (orig_x) != MEM)
7309 return XVECEXP (XEXP (x, 0), 0, 0);
7312 if (GET_CODE (x) != PLUS
7313 || GET_CODE (XEXP (x, 1)) != CONST)
7316 if (GET_CODE (XEXP (x, 0)) == REG
7317 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7318 /* %ebx + GOT/GOTOFF */
7320 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7322 /* %ebx + %reg * scale + GOT/GOTOFF */
7323 reg_addend = XEXP (x, 0);
7324 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7325 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7326 reg_addend = XEXP (reg_addend, 1);
7327 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7328 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7329 reg_addend = XEXP (reg_addend, 0);
7332 if (GET_CODE (reg_addend) != REG
7333 && GET_CODE (reg_addend) != MULT
7334 && GET_CODE (reg_addend) != ASHIFT)
7340 x = XEXP (XEXP (x, 1), 0);
7341 if (GET_CODE (x) == PLUS
7342 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7344 const_addend = XEXP (x, 1);
7348 if (GET_CODE (x) == UNSPEC
7349 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7350 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7351 result = XVECEXP (x, 0, 0);
7353 if (TARGET_MACHO && darwin_local_data_pic (x)
7354 && GET_CODE (orig_x) != MEM)
7355 result = XEXP (x, 0);
7361 result = gen_rtx_PLUS (Pmode, result, const_addend);
7363 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7368 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7373 if (mode == CCFPmode || mode == CCFPUmode)
7375 enum rtx_code second_code, bypass_code;
7376 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7377 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7378 code = ix86_fp_compare_code_to_integer (code);
7382 code = reverse_condition (code);
7393 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7397 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7398 Those same assemblers have the same but opposite lossage on cmov. */
7399 gcc_assert (mode == CCmode);
7400 suffix = fp ? "nbe" : "a";
7420 gcc_assert (mode == CCmode);
7442 gcc_assert (mode == CCmode);
7443 suffix = fp ? "nb" : "ae";
7446 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7450 gcc_assert (mode == CCmode);
7454 suffix = fp ? "u" : "p";
7457 suffix = fp ? "nu" : "np";
7462 fputs (suffix, file);
7465 /* Print the name of register X to FILE based on its machine mode and number.
7466 If CODE is 'w', pretend the mode is HImode.
7467 If CODE is 'b', pretend the mode is QImode.
7468 If CODE is 'k', pretend the mode is SImode.
7469 If CODE is 'q', pretend the mode is DImode.
7470 If CODE is 'h', pretend the reg is the 'high' byte register.
7471 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7474 print_reg (rtx x, int code, FILE *file)
7476 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7477 && REGNO (x) != FRAME_POINTER_REGNUM
7478 && REGNO (x) != FLAGS_REG
7479 && REGNO (x) != FPSR_REG);
7481 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7484 if (code == 'w' || MMX_REG_P (x))
7486 else if (code == 'b')
7488 else if (code == 'k')
7490 else if (code == 'q')
7492 else if (code == 'y')
7494 else if (code == 'h')
7497 code = GET_MODE_SIZE (GET_MODE (x));
7499 /* Irritatingly, AMD extended registers use different naming convention
7500 from the normal registers. */
7501 if (REX_INT_REG_P (x))
7503 gcc_assert (TARGET_64BIT);
7507 error ("extended registers have no high halves");
7510 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7513 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7516 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7519 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7522 error ("unsupported operand size for extended register");
7530 if (STACK_TOP_P (x))
7532 fputs ("st(0)", file);
7539 if (! ANY_FP_REG_P (x))
7540 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7545 fputs (hi_reg_name[REGNO (x)], file);
7548 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7550 fputs (qi_reg_name[REGNO (x)], file);
7553 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7555 fputs (qi_high_reg_name[REGNO (x)], file);
7562 /* Locate some local-dynamic symbol still in use by this function
7563 so that we can print its name in some tls_local_dynamic_base
7567 get_some_local_dynamic_name (void)
7571 if (cfun->machine->some_ld_name)
7572 return cfun->machine->some_ld_name;
7574 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7576 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7577 return cfun->machine->some_ld_name;
7583 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7587 if (GET_CODE (x) == SYMBOL_REF
7588 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7590 cfun->machine->some_ld_name = XSTR (x, 0);
7598 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7599 C -- print opcode suffix for set/cmov insn.
7600 c -- like C, but print reversed condition
7601 F,f -- likewise, but for floating-point.
7602 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7604 R -- print the prefix for register names.
7605 z -- print the opcode suffix for the size of the current operand.
7606 * -- print a star (in certain assembler syntax)
7607 A -- print an absolute memory reference.
7608 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7609 s -- print a shift double count, followed by the assemblers argument
7611 b -- print the QImode name of the register for the indicated operand.
7612 %b0 would print %al if operands[0] is reg 0.
7613 w -- likewise, print the HImode name of the register.
7614 k -- likewise, print the SImode name of the register.
7615 q -- likewise, print the DImode name of the register.
7616 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7617 y -- print "st(0)" instead of "st" as a register.
7618 D -- print condition for SSE cmp instruction.
7619 P -- if PIC, print an @PLT suffix.
7620 X -- don't print any sort of PIC '@' suffix for a symbol.
7621 & -- print some in-use local-dynamic symbol name.
7622 H -- print a memory address offset by 8; used for sse high-parts
7626 print_operand (FILE *file, rtx x, int code)
7633 if (ASSEMBLER_DIALECT == ASM_ATT)
7638 assemble_name (file, get_some_local_dynamic_name ());
7642 switch (ASSEMBLER_DIALECT)
7649 /* Intel syntax. For absolute addresses, registers should not
7650 be surrounded by braces. */
7651 if (GET_CODE (x) != REG)
7654 PRINT_OPERAND (file, x, 0);
7664 PRINT_OPERAND (file, x, 0);
7669 if (ASSEMBLER_DIALECT == ASM_ATT)
7674 if (ASSEMBLER_DIALECT == ASM_ATT)
7679 if (ASSEMBLER_DIALECT == ASM_ATT)
7684 if (ASSEMBLER_DIALECT == ASM_ATT)
7689 if (ASSEMBLER_DIALECT == ASM_ATT)
7694 if (ASSEMBLER_DIALECT == ASM_ATT)
7699 /* 387 opcodes don't get size suffixes if the operands are
7701 if (STACK_REG_P (x))
7704 /* Likewise if using Intel opcodes. */
7705 if (ASSEMBLER_DIALECT == ASM_INTEL)
7708 /* This is the size of op from size of operand. */
7709 switch (GET_MODE_SIZE (GET_MODE (x)))
7712 #ifdef HAVE_GAS_FILDS_FISTS
7718 if (GET_MODE (x) == SFmode)
7733 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7735 #ifdef GAS_MNEMONICS
7761 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7763 PRINT_OPERAND (file, x, 0);
7769 /* Little bit of braindamage here. The SSE compare instructions
7770 does use completely different names for the comparisons that the
7771 fp conditional moves. */
7772 switch (GET_CODE (x))
7787 fputs ("unord", file);
7791 fputs ("neq", file);
7795 fputs ("nlt", file);
7799 fputs ("nle", file);
7802 fputs ("ord", file);
7809 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7810 if (ASSEMBLER_DIALECT == ASM_ATT)
7812 switch (GET_MODE (x))
7814 case HImode: putc ('w', file); break;
7816 case SFmode: putc ('l', file); break;
7818 case DFmode: putc ('q', file); break;
7819 default: gcc_unreachable ();
7826 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7829 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7830 if (ASSEMBLER_DIALECT == ASM_ATT)
7833 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7836 /* Like above, but reverse condition */
7838 /* Check to see if argument to %c is really a constant
7839 and not a condition code which needs to be reversed. */
7840 if (!COMPARISON_P (x))
7842 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7845 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7848 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7849 if (ASSEMBLER_DIALECT == ASM_ATT)
7852 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7856 /* It doesn't actually matter what mode we use here, as we're
7857 only going to use this for printing. */
7858 x = adjust_address_nv (x, DImode, 8);
7865 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7868 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7871 int pred_val = INTVAL (XEXP (x, 0));
7873 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7874 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7876 int taken = pred_val > REG_BR_PROB_BASE / 2;
7877 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7879 /* Emit hints only in the case default branch prediction
7880 heuristics would fail. */
7881 if (taken != cputaken)
7883 /* We use 3e (DS) prefix for taken branches and
7884 2e (CS) prefix for not taken branches. */
7886 fputs ("ds ; ", file);
7888 fputs ("cs ; ", file);
7895 output_operand_lossage ("invalid operand code '%c'", code);
7899 if (GET_CODE (x) == REG)
7900 print_reg (x, code, file);
7902 else if (GET_CODE (x) == MEM)
7904 /* No `byte ptr' prefix for call instructions. */
7905 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7908 switch (GET_MODE_SIZE (GET_MODE (x)))
7910 case 1: size = "BYTE"; break;
7911 case 2: size = "WORD"; break;
7912 case 4: size = "DWORD"; break;
7913 case 8: size = "QWORD"; break;
7914 case 12: size = "XWORD"; break;
7915 case 16: size = "XMMWORD"; break;
7920 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7923 else if (code == 'w')
7925 else if (code == 'k')
7929 fputs (" PTR ", file);
7933 /* Avoid (%rip) for call operands. */
7934 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7935 && GET_CODE (x) != CONST_INT)
7936 output_addr_const (file, x);
7937 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7938 output_operand_lossage ("invalid constraints for operand");
7943 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7948 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7949 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7951 if (ASSEMBLER_DIALECT == ASM_ATT)
7953 fprintf (file, "0x%08lx", l);
7956 /* These float cases don't actually occur as immediate operands. */
7957 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7961 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7962 fprintf (file, "%s", dstr);
7965 else if (GET_CODE (x) == CONST_DOUBLE
7966 && GET_MODE (x) == XFmode)
7970 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7971 fprintf (file, "%s", dstr);
7976 /* We have patterns that allow zero sets of memory, for instance.
7977 In 64-bit mode, we should probably support all 8-byte vectors,
7978 since we can in fact encode that into an immediate. */
7979 if (GET_CODE (x) == CONST_VECTOR)
7981 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7987 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7989 if (ASSEMBLER_DIALECT == ASM_ATT)
7992 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7993 || GET_CODE (x) == LABEL_REF)
7995 if (ASSEMBLER_DIALECT == ASM_ATT)
7998 fputs ("OFFSET FLAT:", file);
8001 if (GET_CODE (x) == CONST_INT)
8002 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8004 output_pic_addr_const (file, x, code);
8006 output_addr_const (file, x);
8010 /* Print a memory operand whose address is ADDR. */
8013 print_operand_address (FILE *file, rtx addr)
8015 struct ix86_address parts;
8016 rtx base, index, disp;
8018 int ok = ix86_decompose_address (addr, &parts);
8023 index = parts.index;
8025 scale = parts.scale;
8033 if (USER_LABEL_PREFIX[0] == 0)
8035 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8041 if (!base && !index)
8043 /* Displacement only requires special attention. */
8045 if (GET_CODE (disp) == CONST_INT)
8047 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8049 if (USER_LABEL_PREFIX[0] == 0)
8051 fputs ("ds:", file);
8053 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8056 output_pic_addr_const (file, disp, 0);
8058 output_addr_const (file, disp);
8060 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8063 if (GET_CODE (disp) == CONST
8064 && GET_CODE (XEXP (disp, 0)) == PLUS
8065 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8066 disp = XEXP (XEXP (disp, 0), 0);
8067 if (GET_CODE (disp) == LABEL_REF
8068 || (GET_CODE (disp) == SYMBOL_REF
8069 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8070 fputs ("(%rip)", file);
8075 if (ASSEMBLER_DIALECT == ASM_ATT)
8080 output_pic_addr_const (file, disp, 0);
8081 else if (GET_CODE (disp) == LABEL_REF)
8082 output_asm_label (disp);
8084 output_addr_const (file, disp);
8089 print_reg (base, 0, file);
8093 print_reg (index, 0, file);
8095 fprintf (file, ",%d", scale);
8101 rtx offset = NULL_RTX;
8105 /* Pull out the offset of a symbol; print any symbol itself. */
8106 if (GET_CODE (disp) == CONST
8107 && GET_CODE (XEXP (disp, 0)) == PLUS
8108 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8110 offset = XEXP (XEXP (disp, 0), 1);
8111 disp = gen_rtx_CONST (VOIDmode,
8112 XEXP (XEXP (disp, 0), 0));
8116 output_pic_addr_const (file, disp, 0);
8117 else if (GET_CODE (disp) == LABEL_REF)
8118 output_asm_label (disp);
8119 else if (GET_CODE (disp) == CONST_INT)
8122 output_addr_const (file, disp);
8128 print_reg (base, 0, file);
8131 if (INTVAL (offset) >= 0)
8133 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8137 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8144 print_reg (index, 0, file);
8146 fprintf (file, "*%d", scale);
8154 output_addr_const_extra (FILE *file, rtx x)
8158 if (GET_CODE (x) != UNSPEC)
8161 op = XVECEXP (x, 0, 0);
8162 switch (XINT (x, 1))
8164 case UNSPEC_GOTTPOFF:
8165 output_addr_const (file, op);
8166 /* FIXME: This might be @TPOFF in Sun ld. */
8167 fputs ("@GOTTPOFF", file);
8170 output_addr_const (file, op);
8171 fputs ("@TPOFF", file);
8174 output_addr_const (file, op);
8176 fputs ("@TPOFF", file);
8178 fputs ("@NTPOFF", file);
8181 output_addr_const (file, op);
8182 fputs ("@DTPOFF", file);
8184 case UNSPEC_GOTNTPOFF:
8185 output_addr_const (file, op);
8187 fputs ("@GOTTPOFF(%rip)", file);
8189 fputs ("@GOTNTPOFF", file);
8191 case UNSPEC_INDNTPOFF:
8192 output_addr_const (file, op);
8193 fputs ("@INDNTPOFF", file);
8203 /* Split one or more DImode RTL references into pairs of SImode
8204 references. The RTL can be REG, offsettable MEM, integer constant, or
8205 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8206 split and "num" is its length. lo_half and hi_half are output arrays
8207 that parallel "operands". */
8210 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8214 rtx op = operands[num];
8216 /* simplify_subreg refuse to split volatile memory addresses,
8217 but we still have to handle it. */
8218 if (GET_CODE (op) == MEM)
8220 lo_half[num] = adjust_address (op, SImode, 0);
8221 hi_half[num] = adjust_address (op, SImode, 4);
8225 lo_half[num] = simplify_gen_subreg (SImode, op,
8226 GET_MODE (op) == VOIDmode
8227 ? DImode : GET_MODE (op), 0);
8228 hi_half[num] = simplify_gen_subreg (SImode, op,
8229 GET_MODE (op) == VOIDmode
8230 ? DImode : GET_MODE (op), 4);
8234 /* Split one or more TImode RTL references into pairs of DImode
8235 references. The RTL can be REG, offsettable MEM, integer constant, or
8236 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8237 split and "num" is its length. lo_half and hi_half are output arrays
8238 that parallel "operands". */
8241 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8245 rtx op = operands[num];
8247 /* simplify_subreg refuse to split volatile memory addresses, but we
8248 still have to handle it. */
8249 if (GET_CODE (op) == MEM)
8251 lo_half[num] = adjust_address (op, DImode, 0);
8252 hi_half[num] = adjust_address (op, DImode, 8);
8256 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8257 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8262 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8263 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8264 is the expression of the binary operation. The output may either be
8265 emitted here, or returned to the caller, like all output_* functions.
8267 There is no guarantee that the operands are the same mode, as they
8268 might be within FLOAT or FLOAT_EXTEND expressions. */
8270 #ifndef SYSV386_COMPAT
8271 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8272 wants to fix the assemblers because that causes incompatibility
8273 with gcc. No-one wants to fix gcc because that causes
8274 incompatibility with assemblers... You can use the option of
8275 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8276 #define SYSV386_COMPAT 1
8280 output_387_binary_op (rtx insn, rtx *operands)
8282 static char buf[30];
8285 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8287 #ifdef ENABLE_CHECKING
8288 /* Even if we do not want to check the inputs, this documents input
8289 constraints. Which helps in understanding the following code. */
8290 if (STACK_REG_P (operands[0])
8291 && ((REG_P (operands[1])
8292 && REGNO (operands[0]) == REGNO (operands[1])
8293 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8294 || (REG_P (operands[2])
8295 && REGNO (operands[0]) == REGNO (operands[2])
8296 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8297 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8300 gcc_assert (is_sse);
8303 switch (GET_CODE (operands[3]))
8306 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8307 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8315 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8316 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8324 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8325 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8333 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8334 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8348 if (GET_MODE (operands[0]) == SFmode)
8349 strcat (buf, "ss\t{%2, %0|%0, %2}");
8351 strcat (buf, "sd\t{%2, %0|%0, %2}");
8356 switch (GET_CODE (operands[3]))
8360 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8362 rtx temp = operands[2];
8363 operands[2] = operands[1];
8367 /* know operands[0] == operands[1]. */
8369 if (GET_CODE (operands[2]) == MEM)
8375 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8377 if (STACK_TOP_P (operands[0]))
8378 /* How is it that we are storing to a dead operand[2]?
8379 Well, presumably operands[1] is dead too. We can't
8380 store the result to st(0) as st(0) gets popped on this
8381 instruction. Instead store to operands[2] (which I
8382 think has to be st(1)). st(1) will be popped later.
8383 gcc <= 2.8.1 didn't have this check and generated
8384 assembly code that the Unixware assembler rejected. */
8385 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8387 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8391 if (STACK_TOP_P (operands[0]))
8392 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8394 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8399 if (GET_CODE (operands[1]) == MEM)
8405 if (GET_CODE (operands[2]) == MEM)
8411 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8414 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8415 derived assemblers, confusingly reverse the direction of
8416 the operation for fsub{r} and fdiv{r} when the
8417 destination register is not st(0). The Intel assembler
8418 doesn't have this brain damage. Read !SYSV386_COMPAT to
8419 figure out what the hardware really does. */
8420 if (STACK_TOP_P (operands[0]))
8421 p = "{p\t%0, %2|rp\t%2, %0}";
8423 p = "{rp\t%2, %0|p\t%0, %2}";
8425 if (STACK_TOP_P (operands[0]))
8426 /* As above for fmul/fadd, we can't store to st(0). */
8427 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8429 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8434 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8437 if (STACK_TOP_P (operands[0]))
8438 p = "{rp\t%0, %1|p\t%1, %0}";
8440 p = "{p\t%1, %0|rp\t%0, %1}";
8442 if (STACK_TOP_P (operands[0]))
8443 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8445 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8450 if (STACK_TOP_P (operands[0]))
8452 if (STACK_TOP_P (operands[1]))
8453 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8455 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8458 else if (STACK_TOP_P (operands[1]))
8461 p = "{\t%1, %0|r\t%0, %1}";
8463 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8469 p = "{r\t%2, %0|\t%0, %2}";
8471 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8484 /* Return needed mode for entity in optimize_mode_switching pass. */
8487 ix86_mode_needed (int entity, rtx insn)
8489 enum attr_i387_cw mode;
8491 /* The mode UNINITIALIZED is used to store control word after a
8492 function call or ASM pattern. The mode ANY specify that function
8493 has no requirements on the control word and make no changes in the
8494 bits we are interested in. */
8497 || (NONJUMP_INSN_P (insn)
8498 && (asm_noperands (PATTERN (insn)) >= 0
8499 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8500 return I387_CW_UNINITIALIZED;
8502 if (recog_memoized (insn) < 0)
8505 mode = get_attr_i387_cw (insn);
8510 if (mode == I387_CW_TRUNC)
8515 if (mode == I387_CW_FLOOR)
8520 if (mode == I387_CW_CEIL)
8525 if (mode == I387_CW_MASK_PM)
8536 /* Output code to initialize control word copies used by trunc?f?i and
8537 rounding patterns. CURRENT_MODE is set to current control word,
8538 while NEW_MODE is set to new control word. */
8541 emit_i387_cw_initialization (int mode)
8543 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8548 rtx reg = gen_reg_rtx (HImode);
8550 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8551 emit_move_insn (reg, stored_mode);
8553 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8558 /* round toward zero (truncate) */
8559 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8560 slot = SLOT_CW_TRUNC;
8564 /* round down toward -oo */
8565 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8566 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8567 slot = SLOT_CW_FLOOR;
8571 /* round up toward +oo */
8572 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8573 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8574 slot = SLOT_CW_CEIL;
8577 case I387_CW_MASK_PM:
8578 /* mask precision exception for nearbyint() */
8579 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8580 slot = SLOT_CW_MASK_PM;
8592 /* round toward zero (truncate) */
8593 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8594 slot = SLOT_CW_TRUNC;
8598 /* round down toward -oo */
8599 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8600 slot = SLOT_CW_FLOOR;
8604 /* round up toward +oo */
8605 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8606 slot = SLOT_CW_CEIL;
8609 case I387_CW_MASK_PM:
8610 /* mask precision exception for nearbyint() */
8611 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8612 slot = SLOT_CW_MASK_PM;
8620 gcc_assert (slot < MAX_386_STACK_LOCALS);
8622 new_mode = assign_386_stack_local (HImode, slot);
8623 emit_move_insn (new_mode, reg);
8626 /* Output code for INSN to convert a float to a signed int. OPERANDS
8627 are the insn operands. The output may be [HSD]Imode and the input
8628 operand may be [SDX]Fmode. */
8631 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8633 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8634 int dimode_p = GET_MODE (operands[0]) == DImode;
8635 int round_mode = get_attr_i387_cw (insn);
8637 /* Jump through a hoop or two for DImode, since the hardware has no
8638 non-popping instruction. We used to do this a different way, but
8639 that was somewhat fragile and broke with post-reload splitters. */
8640 if ((dimode_p || fisttp) && !stack_top_dies)
8641 output_asm_insn ("fld\t%y1", operands);
8643 gcc_assert (STACK_TOP_P (operands[1]));
8644 gcc_assert (GET_CODE (operands[0]) == MEM);
8647 output_asm_insn ("fisttp%z0\t%0", operands);
8650 if (round_mode != I387_CW_ANY)
8651 output_asm_insn ("fldcw\t%3", operands);
8652 if (stack_top_dies || dimode_p)
8653 output_asm_insn ("fistp%z0\t%0", operands);
8655 output_asm_insn ("fist%z0\t%0", operands);
8656 if (round_mode != I387_CW_ANY)
8657 output_asm_insn ("fldcw\t%2", operands);
8663 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8664 have the values zero or one, indicates the ffreep insn's operand
8665 from the OPERANDS array. */
8668 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8670 if (TARGET_USE_FFREEP)
8671 #if HAVE_AS_IX86_FFREEP
8672 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8674 switch (REGNO (operands[opno]))
8676 case FIRST_STACK_REG + 0: return ".word\t0xc0df";
8677 case FIRST_STACK_REG + 1: return ".word\t0xc1df";
8678 case FIRST_STACK_REG + 2: return ".word\t0xc2df";
8679 case FIRST_STACK_REG + 3: return ".word\t0xc3df";
8680 case FIRST_STACK_REG + 4: return ".word\t0xc4df";
8681 case FIRST_STACK_REG + 5: return ".word\t0xc5df";
8682 case FIRST_STACK_REG + 6: return ".word\t0xc6df";
8683 case FIRST_STACK_REG + 7: return ".word\t0xc7df";
8687 return opno ? "fstp\t%y1" : "fstp\t%y0";
8691 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8692 should be used. UNORDERED_P is true when fucom should be used. */
8695 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8698 rtx cmp_op0, cmp_op1;
8699 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8703 cmp_op0 = operands[0];
8704 cmp_op1 = operands[1];
8708 cmp_op0 = operands[1];
8709 cmp_op1 = operands[2];
8714 if (GET_MODE (operands[0]) == SFmode)
8716 return "ucomiss\t{%1, %0|%0, %1}";
8718 return "comiss\t{%1, %0|%0, %1}";
8721 return "ucomisd\t{%1, %0|%0, %1}";
8723 return "comisd\t{%1, %0|%0, %1}";
8726 gcc_assert (STACK_TOP_P (cmp_op0));
8728 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8730 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8734 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8735 return output_387_ffreep (operands, 1);
8738 return "ftst\n\tfnstsw\t%0";
8741 if (STACK_REG_P (cmp_op1)
8743 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8744 && REGNO (cmp_op1) != FIRST_STACK_REG)
8746 /* If both the top of the 387 stack dies, and the other operand
8747 is also a stack register that dies, then this must be a
8748 `fcompp' float compare */
8752 /* There is no double popping fcomi variant. Fortunately,
8753 eflags is immune from the fstp's cc clobbering. */
8755 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8757 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8758 return output_387_ffreep (operands, 0);
8763 return "fucompp\n\tfnstsw\t%0";
8765 return "fcompp\n\tfnstsw\t%0";
8770 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8772 static const char * const alt[16] =
8774 "fcom%z2\t%y2\n\tfnstsw\t%0",
8775 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8776 "fucom%z2\t%y2\n\tfnstsw\t%0",
8777 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8779 "ficom%z2\t%y2\n\tfnstsw\t%0",
8780 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8784 "fcomi\t{%y1, %0|%0, %y1}",
8785 "fcomip\t{%y1, %0|%0, %y1}",
8786 "fucomi\t{%y1, %0|%0, %y1}",
8787 "fucomip\t{%y1, %0|%0, %y1}",
8798 mask = eflags_p << 3;
8799 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8800 mask |= unordered_p << 1;
8801 mask |= stack_top_dies;
8803 gcc_assert (mask < 16);
8812 ix86_output_addr_vec_elt (FILE *file, int value)
8814 const char *directive = ASM_LONG;
8818 directive = ASM_QUAD;
8820 gcc_assert (!TARGET_64BIT);
8823 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8827 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8830 fprintf (file, "%s%s%d-%s%d\n",
8831 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8832 else if (HAVE_AS_GOTOFF_IN_DATA)
8833 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8835 else if (TARGET_MACHO)
8837 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8838 machopic_output_function_base_name (file);
8839 fprintf(file, "\n");
8843 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8844 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8847 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8851 ix86_expand_clear (rtx dest)
8855 /* We play register width games, which are only valid after reload. */
8856 gcc_assert (reload_completed);
8858 /* Avoid HImode and its attendant prefix byte. */
8859 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8860 dest = gen_rtx_REG (SImode, REGNO (dest));
8862 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8864 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8865 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8867 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8868 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8874 /* X is an unchanging MEM. If it is a constant pool reference, return
8875 the constant pool rtx, else NULL. */
8878 maybe_get_pool_constant (rtx x)
8880 x = ix86_delegitimize_address (XEXP (x, 0));
8882 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8883 return get_pool_constant (x);
8889 ix86_expand_move (enum machine_mode mode, rtx operands[])
8891 int strict = (reload_in_progress || reload_completed);
8893 enum tls_model model;
8898 if (GET_CODE (op1) == SYMBOL_REF)
8900 model = SYMBOL_REF_TLS_MODEL (op1);
8903 op1 = legitimize_tls_address (op1, model, true);
8904 op1 = force_operand (op1, op0);
8909 else if (GET_CODE (op1) == CONST
8910 && GET_CODE (XEXP (op1, 0)) == PLUS
8911 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8913 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8916 rtx addend = XEXP (XEXP (op1, 0), 1);
8917 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8918 op1 = force_operand (op1, NULL);
8919 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8920 op0, 1, OPTAB_DIRECT);
8926 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8928 if (TARGET_MACHO && !TARGET_64BIT)
8933 rtx temp = ((reload_in_progress
8934 || ((op0 && GET_CODE (op0) == REG)
8936 ? op0 : gen_reg_rtx (Pmode));
8937 op1 = machopic_indirect_data_reference (op1, temp);
8938 op1 = machopic_legitimize_pic_address (op1, mode,
8939 temp == op1 ? 0 : temp);
8941 else if (MACHOPIC_INDIRECT)
8942 op1 = machopic_indirect_data_reference (op1, 0);
8949 if (GET_CODE (op0) == MEM)
8950 op1 = force_reg (Pmode, op1);
8952 op1 = legitimize_address (op1, op1, Pmode);
8957 if (GET_CODE (op0) == MEM
8958 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8959 || !push_operand (op0, mode))
8960 && GET_CODE (op1) == MEM)
8961 op1 = force_reg (mode, op1);
8963 if (push_operand (op0, mode)
8964 && ! general_no_elim_operand (op1, mode))
8965 op1 = copy_to_mode_reg (mode, op1);
8967 /* Force large constants in 64bit compilation into register
8968 to get them CSEed. */
8969 if (TARGET_64BIT && mode == DImode
8970 && immediate_operand (op1, mode)
8971 && !x86_64_zext_immediate_operand (op1, VOIDmode)
8972 && !register_operand (op0, mode)
8973 && optimize && !reload_completed && !reload_in_progress)
8974 op1 = copy_to_mode_reg (mode, op1);
8976 if (FLOAT_MODE_P (mode))
8978 /* If we are loading a floating point constant to a register,
8979 force the value to memory now, since we'll get better code
8980 out the back end. */
8984 else if (GET_CODE (op1) == CONST_DOUBLE)
8986 op1 = validize_mem (force_const_mem (mode, op1));
8987 if (!register_operand (op0, mode))
8989 rtx temp = gen_reg_rtx (mode);
8990 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8991 emit_move_insn (op0, temp);
8998 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9002 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9004 rtx op0 = operands[0], op1 = operands[1];
9006 /* Force constants other than zero into memory. We do not know how
9007 the instructions used to build constants modify the upper 64 bits
9008 of the register, once we have that information we may be able
9009 to handle some of them more efficiently. */
9010 if ((reload_in_progress | reload_completed) == 0
9011 && register_operand (op0, mode)
9013 && standard_sse_constant_p (op1) <= 0)
9014 op1 = validize_mem (force_const_mem (mode, op1));
9016 /* Make operand1 a register if it isn't already. */
9018 && !register_operand (op0, mode)
9019 && !register_operand (op1, mode))
9021 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9025 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9028 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9029 straight to ix86_expand_vector_move. */
9032 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9041 /* If we're optimizing for size, movups is the smallest. */
9044 op0 = gen_lowpart (V4SFmode, op0);
9045 op1 = gen_lowpart (V4SFmode, op1);
9046 emit_insn (gen_sse_movups (op0, op1));
9050 /* ??? If we have typed data, then it would appear that using
9051 movdqu is the only way to get unaligned data loaded with
9053 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9055 op0 = gen_lowpart (V16QImode, op0);
9056 op1 = gen_lowpart (V16QImode, op1);
9057 emit_insn (gen_sse2_movdqu (op0, op1));
9061 if (TARGET_SSE2 && mode == V2DFmode)
9065 /* When SSE registers are split into halves, we can avoid
9066 writing to the top half twice. */
9067 if (TARGET_SSE_SPLIT_REGS)
9069 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9074 /* ??? Not sure about the best option for the Intel chips.
9075 The following would seem to satisfy; the register is
9076 entirely cleared, breaking the dependency chain. We
9077 then store to the upper half, with a dependency depth
9078 of one. A rumor has it that Intel recommends two movsd
9079 followed by an unpacklpd, but this is unconfirmed. And
9080 given that the dependency depth of the unpacklpd would
9081 still be one, I'm not sure why this would be better. */
9082 zero = CONST0_RTX (V2DFmode);
9085 m = adjust_address (op1, DFmode, 0);
9086 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9087 m = adjust_address (op1, DFmode, 8);
9088 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9092 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9093 emit_move_insn (op0, CONST0_RTX (mode));
9095 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9097 if (mode != V4SFmode)
9098 op0 = gen_lowpart (V4SFmode, op0);
9099 m = adjust_address (op1, V2SFmode, 0);
9100 emit_insn (gen_sse_loadlps (op0, op0, m));
9101 m = adjust_address (op1, V2SFmode, 8);
9102 emit_insn (gen_sse_loadhps (op0, op0, m));
9105 else if (MEM_P (op0))
9107 /* If we're optimizing for size, movups is the smallest. */
9110 op0 = gen_lowpart (V4SFmode, op0);
9111 op1 = gen_lowpart (V4SFmode, op1);
9112 emit_insn (gen_sse_movups (op0, op1));
9116 /* ??? Similar to above, only less clear because of quote
9117 typeless stores unquote. */
9118 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9119 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9121 op0 = gen_lowpart (V16QImode, op0);
9122 op1 = gen_lowpart (V16QImode, op1);
9123 emit_insn (gen_sse2_movdqu (op0, op1));
9127 if (TARGET_SSE2 && mode == V2DFmode)
9129 m = adjust_address (op0, DFmode, 0);
9130 emit_insn (gen_sse2_storelpd (m, op1));
9131 m = adjust_address (op0, DFmode, 8);
9132 emit_insn (gen_sse2_storehpd (m, op1));
9136 if (mode != V4SFmode)
9137 op1 = gen_lowpart (V4SFmode, op1);
9138 m = adjust_address (op0, V2SFmode, 0);
9139 emit_insn (gen_sse_storelps (m, op1));
9140 m = adjust_address (op0, V2SFmode, 8);
9141 emit_insn (gen_sse_storehps (m, op1));
9148 /* Expand a push in MODE. This is some mode for which we do not support
9149 proper push instructions, at least from the registers that we expect
9150 the value to live in. */
9153 ix86_expand_push (enum machine_mode mode, rtx x)
9157 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9158 GEN_INT (-GET_MODE_SIZE (mode)),
9159 stack_pointer_rtx, 1, OPTAB_DIRECT);
9160 if (tmp != stack_pointer_rtx)
9161 emit_move_insn (stack_pointer_rtx, tmp);
9163 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9164 emit_move_insn (tmp, x);
9167 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9168 destination to use for the operation. If different from the true
9169 destination in operands[0], a copy operation will be required. */
9172 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9175 int matching_memory;
9176 rtx src1, src2, dst;
9182 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9183 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9184 && (rtx_equal_p (dst, src2)
9185 || immediate_operand (src1, mode)))
9192 /* If the destination is memory, and we do not have matching source
9193 operands, do things in registers. */
9194 matching_memory = 0;
9195 if (GET_CODE (dst) == MEM)
9197 if (rtx_equal_p (dst, src1))
9198 matching_memory = 1;
9199 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9200 && rtx_equal_p (dst, src2))
9201 matching_memory = 2;
9203 dst = gen_reg_rtx (mode);
9206 /* Both source operands cannot be in memory. */
9207 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9209 if (matching_memory != 2)
9210 src2 = force_reg (mode, src2);
9212 src1 = force_reg (mode, src1);
9215 /* If the operation is not commutable, source 1 cannot be a constant
9216 or non-matching memory. */
9217 if ((CONSTANT_P (src1)
9218 || (!matching_memory && GET_CODE (src1) == MEM))
9219 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9220 src1 = force_reg (mode, src1);
9222 src1 = operands[1] = src1;
9223 src2 = operands[2] = src2;
9227 /* Similarly, but assume that the destination has already been
9231 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9232 enum machine_mode mode, rtx operands[])
9234 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9235 gcc_assert (dst == operands[0]);
9238 /* Attempt to expand a binary operator. Make the expansion closer to the
9239 actual machine, then just general_operand, which will allow 3 separate
9240 memory references (one output, two input) in a single insn. */
9243 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9246 rtx src1, src2, dst, op, clob;
9248 dst = ix86_fixup_binary_operands (code, mode, operands);
9252 /* Emit the instruction. */
9254 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9255 if (reload_in_progress)
9257 /* Reload doesn't know about the flags register, and doesn't know that
9258 it doesn't want to clobber it. We can only do this with PLUS. */
9259 gcc_assert (code == PLUS);
9264 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9265 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9268 /* Fix up the destination if needed. */
9269 if (dst != operands[0])
9270 emit_move_insn (operands[0], dst);
9273 /* Return TRUE or FALSE depending on whether the binary operator meets the
9274 appropriate constraints. */
9277 ix86_binary_operator_ok (enum rtx_code code,
9278 enum machine_mode mode ATTRIBUTE_UNUSED,
9281 /* Both source operands cannot be in memory. */
9282 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9284 /* If the operation is not commutable, source 1 cannot be a constant. */
9285 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9287 /* If the destination is memory, we must have a matching source operand. */
9288 if (GET_CODE (operands[0]) == MEM
9289 && ! (rtx_equal_p (operands[0], operands[1])
9290 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9291 && rtx_equal_p (operands[0], operands[2]))))
9293 /* If the operation is not commutable and the source 1 is memory, we must
9294 have a matching destination. */
9295 if (GET_CODE (operands[1]) == MEM
9296 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9297 && ! rtx_equal_p (operands[0], operands[1]))
9302 /* Attempt to expand a unary operator. Make the expansion closer to the
9303 actual machine, then just general_operand, which will allow 2 separate
9304 memory references (one output, one input) in a single insn. */
9307 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9310 int matching_memory;
9311 rtx src, dst, op, clob;
9316 /* If the destination is memory, and we do not have matching source
9317 operands, do things in registers. */
9318 matching_memory = 0;
9321 if (rtx_equal_p (dst, src))
9322 matching_memory = 1;
9324 dst = gen_reg_rtx (mode);
9327 /* When source operand is memory, destination must match. */
9328 if (MEM_P (src) && !matching_memory)
9329 src = force_reg (mode, src);
9331 /* Emit the instruction. */
9333 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9334 if (reload_in_progress || code == NOT)
9336 /* Reload doesn't know about the flags register, and doesn't know that
9337 it doesn't want to clobber it. */
9338 gcc_assert (code == NOT);
9343 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9344 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9347 /* Fix up the destination if needed. */
9348 if (dst != operands[0])
9349 emit_move_insn (operands[0], dst);
9352 /* Return TRUE or FALSE depending on whether the unary operator meets the
9353 appropriate constraints. */
9356 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9357 enum machine_mode mode ATTRIBUTE_UNUSED,
9358 rtx operands[2] ATTRIBUTE_UNUSED)
9360 /* If one of operands is memory, source and destination must match. */
9361 if ((GET_CODE (operands[0]) == MEM
9362 || GET_CODE (operands[1]) == MEM)
9363 && ! rtx_equal_p (operands[0], operands[1]))
9368 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9369 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9370 true, then replicate the mask for all elements of the vector register.
9371 If INVERT is true, then create a mask excluding the sign bit. */
9374 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9376 enum machine_mode vec_mode;
9377 HOST_WIDE_INT hi, lo;
9382 /* Find the sign bit, sign extended to 2*HWI. */
9384 lo = 0x80000000, hi = lo < 0;
9385 else if (HOST_BITS_PER_WIDE_INT >= 64)
9386 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9388 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9393 /* Force this value into the low part of a fp vector constant. */
9394 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9395 mask = gen_lowpart (mode, mask);
9400 v = gen_rtvec (4, mask, mask, mask, mask);
9402 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9403 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9404 vec_mode = V4SFmode;
9409 v = gen_rtvec (2, mask, mask);
9411 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9412 vec_mode = V2DFmode;
9415 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9418 /* Generate code for floating point ABS or NEG. */
9421 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9424 rtx mask, set, use, clob, dst, src;
9425 bool matching_memory;
9426 bool use_sse = false;
9427 bool vector_mode = VECTOR_MODE_P (mode);
9428 enum machine_mode elt_mode = mode;
9432 elt_mode = GET_MODE_INNER (mode);
9435 else if (TARGET_SSE_MATH)
9436 use_sse = SSE_FLOAT_MODE_P (mode);
9438 /* NEG and ABS performed with SSE use bitwise mask operations.
9439 Create the appropriate mask now. */
9441 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9448 /* If the destination is memory, and we don't have matching source
9449 operands or we're using the x87, do things in registers. */
9450 matching_memory = false;
9453 if (use_sse && rtx_equal_p (dst, src))
9454 matching_memory = true;
9456 dst = gen_reg_rtx (mode);
9458 if (MEM_P (src) && !matching_memory)
9459 src = force_reg (mode, src);
9463 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9464 set = gen_rtx_SET (VOIDmode, dst, set);
9469 set = gen_rtx_fmt_e (code, mode, src);
9470 set = gen_rtx_SET (VOIDmode, dst, set);
9473 use = gen_rtx_USE (VOIDmode, mask);
9474 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9475 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9476 gen_rtvec (3, set, use, clob)));
9482 if (dst != operands[0])
9483 emit_move_insn (operands[0], dst);
9486 /* Expand a copysign operation. Special case operand 0 being a constant. */
9489 ix86_expand_copysign (rtx operands[])
9491 enum machine_mode mode, vmode;
9492 rtx dest, op0, op1, mask, nmask;
9498 mode = GET_MODE (dest);
9499 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9501 if (GET_CODE (op0) == CONST_DOUBLE)
9505 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9506 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9508 if (op0 == CONST0_RTX (mode))
9509 op0 = CONST0_RTX (vmode);
9513 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9514 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9516 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9517 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9520 mask = ix86_build_signbit_mask (mode, 0, 0);
9523 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9525 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9529 nmask = ix86_build_signbit_mask (mode, 0, 1);
9530 mask = ix86_build_signbit_mask (mode, 0, 0);
9533 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9535 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9539 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9540 be a constant, and so has already been expanded into a vector constant. */
9543 ix86_split_copysign_const (rtx operands[])
9545 enum machine_mode mode, vmode;
9546 rtx dest, op0, op1, mask, x;
9553 mode = GET_MODE (dest);
9554 vmode = GET_MODE (mask);
9556 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9557 x = gen_rtx_AND (vmode, dest, mask);
9558 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9560 if (op0 != CONST0_RTX (vmode))
9562 x = gen_rtx_IOR (vmode, dest, op0);
9563 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9567 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9568 so we have to do two masks. */
9571 ix86_split_copysign_var (rtx operands[])
9573 enum machine_mode mode, vmode;
9574 rtx dest, scratch, op0, op1, mask, nmask, x;
9577 scratch = operands[1];
9580 nmask = operands[4];
9583 mode = GET_MODE (dest);
9584 vmode = GET_MODE (mask);
9586 if (rtx_equal_p (op0, op1))
9588 /* Shouldn't happen often (it's useless, obviously), but when it does
9589 we'd generate incorrect code if we continue below. */
9590 emit_move_insn (dest, op0);
9594 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9596 gcc_assert (REGNO (op1) == REGNO (scratch));
9598 x = gen_rtx_AND (vmode, scratch, mask);
9599 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9602 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9603 x = gen_rtx_NOT (vmode, dest);
9604 x = gen_rtx_AND (vmode, x, op0);
9605 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9609 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9611 x = gen_rtx_AND (vmode, scratch, mask);
9613 else /* alternative 2,4 */
9615 gcc_assert (REGNO (mask) == REGNO (scratch));
9616 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9617 x = gen_rtx_AND (vmode, scratch, op1);
9619 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9621 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9623 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9624 x = gen_rtx_AND (vmode, dest, nmask);
9626 else /* alternative 3,4 */
9628 gcc_assert (REGNO (nmask) == REGNO (dest));
9630 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9631 x = gen_rtx_AND (vmode, dest, op0);
9633 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9636 x = gen_rtx_IOR (vmode, dest, scratch);
9637 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9640 /* Return TRUE or FALSE depending on whether the first SET in INSN
9641 has source and destination with matching CC modes, and that the
9642 CC mode is at least as constrained as REQ_MODE. */
9645 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9648 enum machine_mode set_mode;
9650 set = PATTERN (insn);
9651 if (GET_CODE (set) == PARALLEL)
9652 set = XVECEXP (set, 0, 0);
9653 gcc_assert (GET_CODE (set) == SET);
9654 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9656 set_mode = GET_MODE (SET_DEST (set));
9660 if (req_mode != CCNOmode
9661 && (req_mode != CCmode
9662 || XEXP (SET_SRC (set), 1) != const0_rtx))
9666 if (req_mode == CCGCmode)
9670 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9674 if (req_mode == CCZmode)
9684 return (GET_MODE (SET_SRC (set)) == set_mode);
9687 /* Generate insn patterns to do an integer compare of OPERANDS. */
9690 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9692 enum machine_mode cmpmode;
9695 cmpmode = SELECT_CC_MODE (code, op0, op1);
9696 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9698 /* This is very simple, but making the interface the same as in the
9699 FP case makes the rest of the code easier. */
9700 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9701 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9703 /* Return the test that should be put into the flags user, i.e.
9704 the bcc, scc, or cmov instruction. */
9705 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9708 /* Figure out whether to use ordered or unordered fp comparisons.
9709 Return the appropriate mode to use. */
9712 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9714 /* ??? In order to make all comparisons reversible, we do all comparisons
9715 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9716 all forms trapping and nontrapping comparisons, we can make inequality
9717 comparisons trapping again, since it results in better code when using
9718 FCOM based compares. */
9719 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9723 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9725 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9726 return ix86_fp_compare_mode (code);
9729 /* Only zero flag is needed. */
9731 case NE: /* ZF!=0 */
9733 /* Codes needing carry flag. */
9734 case GEU: /* CF=0 */
9735 case GTU: /* CF=0 & ZF=0 */
9736 case LTU: /* CF=1 */
9737 case LEU: /* CF=1 | ZF=1 */
9739 /* Codes possibly doable only with sign flag when
9740 comparing against zero. */
9741 case GE: /* SF=OF or SF=0 */
9742 case LT: /* SF<>OF or SF=1 */
9743 if (op1 == const0_rtx)
9746 /* For other cases Carry flag is not required. */
9748 /* Codes doable only with sign flag when comparing
9749 against zero, but we miss jump instruction for it
9750 so we need to use relational tests against overflow
9751 that thus needs to be zero. */
9752 case GT: /* ZF=0 & SF=OF */
9753 case LE: /* ZF=1 | SF<>OF */
9754 if (op1 == const0_rtx)
9758 /* strcmp pattern do (use flags) and combine may ask us for proper
9767 /* Return the fixed registers used for condition codes. */
9770 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9777 /* If two condition code modes are compatible, return a condition code
9778 mode which is compatible with both. Otherwise, return
9781 static enum machine_mode
9782 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9787 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9790 if ((m1 == CCGCmode && m2 == CCGOCmode)
9791 || (m1 == CCGOCmode && m2 == CCGCmode))
9819 /* These are only compatible with themselves, which we already
9825 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9828 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9830 enum rtx_code swapped_code = swap_condition (code);
9831 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9832 || (ix86_fp_comparison_cost (swapped_code)
9833 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9836 /* Swap, force into registers, or otherwise massage the two operands
9837 to a fp comparison. The operands are updated in place; the new
9838 comparison code is returned. */
9840 static enum rtx_code
9841 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9843 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9844 rtx op0 = *pop0, op1 = *pop1;
9845 enum machine_mode op_mode = GET_MODE (op0);
9846 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9848 /* All of the unordered compare instructions only work on registers.
9849 The same is true of the fcomi compare instructions. The XFmode
9850 compare instructions require registers except when comparing
9851 against zero or when converting operand 1 from fixed point to
9855 && (fpcmp_mode == CCFPUmode
9856 || (op_mode == XFmode
9857 && ! (standard_80387_constant_p (op0) == 1
9858 || standard_80387_constant_p (op1) == 1)
9859 && GET_CODE (op1) != FLOAT)
9860 || ix86_use_fcomi_compare (code)))
9862 op0 = force_reg (op_mode, op0);
9863 op1 = force_reg (op_mode, op1);
9867 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9868 things around if they appear profitable, otherwise force op0
9871 if (standard_80387_constant_p (op0) == 0
9872 || (GET_CODE (op0) == MEM
9873 && ! (standard_80387_constant_p (op1) == 0
9874 || GET_CODE (op1) == MEM)))
9877 tmp = op0, op0 = op1, op1 = tmp;
9878 code = swap_condition (code);
9881 if (GET_CODE (op0) != REG)
9882 op0 = force_reg (op_mode, op0);
9884 if (CONSTANT_P (op1))
9886 int tmp = standard_80387_constant_p (op1);
9888 op1 = validize_mem (force_const_mem (op_mode, op1));
9892 op1 = force_reg (op_mode, op1);
9895 op1 = force_reg (op_mode, op1);
9899 /* Try to rearrange the comparison to make it cheaper. */
9900 if (ix86_fp_comparison_cost (code)
9901 > ix86_fp_comparison_cost (swap_condition (code))
9902 && (GET_CODE (op1) == REG || !no_new_pseudos))
9905 tmp = op0, op0 = op1, op1 = tmp;
9906 code = swap_condition (code);
9907 if (GET_CODE (op0) != REG)
9908 op0 = force_reg (op_mode, op0);
9916 /* Convert comparison codes we use to represent FP comparison to integer
9917 code that will result in proper branch. Return UNKNOWN if no such code
9921 ix86_fp_compare_code_to_integer (enum rtx_code code)
9950 /* Split comparison code CODE into comparisons we can do using branch
9951 instructions. BYPASS_CODE is comparison code for branch that will
9952 branch around FIRST_CODE and SECOND_CODE. If some of branches
9953 is not required, set value to UNKNOWN.
9954 We never require more than two branches. */
9957 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9958 enum rtx_code *first_code,
9959 enum rtx_code *second_code)
9962 *bypass_code = UNKNOWN;
9963 *second_code = UNKNOWN;
9965 /* The fcomi comparison sets flags as follows:
9975 case GT: /* GTU - CF=0 & ZF=0 */
9976 case GE: /* GEU - CF=0 */
9977 case ORDERED: /* PF=0 */
9978 case UNORDERED: /* PF=1 */
9979 case UNEQ: /* EQ - ZF=1 */
9980 case UNLT: /* LTU - CF=1 */
9981 case UNLE: /* LEU - CF=1 | ZF=1 */
9982 case LTGT: /* EQ - ZF=0 */
9984 case LT: /* LTU - CF=1 - fails on unordered */
9986 *bypass_code = UNORDERED;
9988 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9990 *bypass_code = UNORDERED;
9992 case EQ: /* EQ - ZF=1 - fails on unordered */
9994 *bypass_code = UNORDERED;
9996 case NE: /* NE - ZF=0 - fails on unordered */
9998 *second_code = UNORDERED;
10000 case UNGE: /* GEU - CF=0 - fails on unordered */
10002 *second_code = UNORDERED;
10004 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10006 *second_code = UNORDERED;
10009 gcc_unreachable ();
10011 if (!TARGET_IEEE_FP)
10013 *second_code = UNKNOWN;
10014 *bypass_code = UNKNOWN;
10018 /* Return cost of comparison done fcom + arithmetics operations on AX.
10019 All following functions do use number of instructions as a cost metrics.
10020 In future this should be tweaked to compute bytes for optimize_size and
10021 take into account performance of various instructions on various CPUs. */
10023 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10025 if (!TARGET_IEEE_FP)
10027 /* The cost of code output by ix86_expand_fp_compare. */
10051 gcc_unreachable ();
10055 /* Return cost of comparison done using fcomi operation.
10056 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10058 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10060 enum rtx_code bypass_code, first_code, second_code;
10061 /* Return arbitrarily high cost when instruction is not supported - this
10062 prevents gcc from using it. */
10065 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10066 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10069 /* Return cost of comparison done using sahf operation.
10070 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10072 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10074 enum rtx_code bypass_code, first_code, second_code;
10075 /* Return arbitrarily high cost when instruction is not preferred - this
10076 avoids gcc from using it. */
10077 if (!TARGET_USE_SAHF && !optimize_size)
10079 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10080 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10083 /* Compute cost of the comparison done using any method.
10084 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10086 ix86_fp_comparison_cost (enum rtx_code code)
10088 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10091 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10092 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10094 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10095 if (min > sahf_cost)
10097 if (min > fcomi_cost)
10102 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10105 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10106 rtx *second_test, rtx *bypass_test)
10108 enum machine_mode fpcmp_mode, intcmp_mode;
10110 int cost = ix86_fp_comparison_cost (code);
10111 enum rtx_code bypass_code, first_code, second_code;
10113 fpcmp_mode = ix86_fp_compare_mode (code);
10114 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10117 *second_test = NULL_RTX;
10119 *bypass_test = NULL_RTX;
10121 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10123 /* Do fcomi/sahf based test when profitable. */
10124 if ((bypass_code == UNKNOWN || bypass_test)
10125 && (second_code == UNKNOWN || second_test)
10126 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10130 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10131 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10137 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10138 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10140 scratch = gen_reg_rtx (HImode);
10141 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10142 emit_insn (gen_x86_sahf_1 (scratch));
10145 /* The FP codes work out to act like unsigned. */
10146 intcmp_mode = fpcmp_mode;
10148 if (bypass_code != UNKNOWN)
10149 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10150 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10152 if (second_code != UNKNOWN)
10153 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10154 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10159 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10160 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10161 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10163 scratch = gen_reg_rtx (HImode);
10164 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10166 /* In the unordered case, we have to check C2 for NaN's, which
10167 doesn't happen to work out to anything nice combination-wise.
10168 So do some bit twiddling on the value we've got in AH to come
10169 up with an appropriate set of condition codes. */
10171 intcmp_mode = CCNOmode;
10176 if (code == GT || !TARGET_IEEE_FP)
10178 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10183 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10184 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10185 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10186 intcmp_mode = CCmode;
10192 if (code == LT && TARGET_IEEE_FP)
10194 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10195 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10196 intcmp_mode = CCmode;
10201 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10207 if (code == GE || !TARGET_IEEE_FP)
10209 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10214 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10215 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10222 if (code == LE && TARGET_IEEE_FP)
10224 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10225 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10226 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10227 intcmp_mode = CCmode;
10232 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10238 if (code == EQ && TARGET_IEEE_FP)
10240 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10241 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10242 intcmp_mode = CCmode;
10247 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10254 if (code == NE && TARGET_IEEE_FP)
10256 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10257 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10263 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10269 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10273 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10278 gcc_unreachable ();
10282 /* Return the test that should be put into the flags user, i.e.
10283 the bcc, scc, or cmov instruction. */
10284 return gen_rtx_fmt_ee (code, VOIDmode,
10285 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10290 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10293 op0 = ix86_compare_op0;
10294 op1 = ix86_compare_op1;
10297 *second_test = NULL_RTX;
10299 *bypass_test = NULL_RTX;
10301 if (ix86_compare_emitted)
10303 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10304 ix86_compare_emitted = NULL_RTX;
10306 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10307 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10308 second_test, bypass_test);
10310 ret = ix86_expand_int_compare (code, op0, op1);
10315 /* Return true if the CODE will result in nontrivial jump sequence. */
10317 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10319 enum rtx_code bypass_code, first_code, second_code;
10322 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10323 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10327 ix86_expand_branch (enum rtx_code code, rtx label)
10331 /* If we have emitted a compare insn, go straight to simple.
10332 ix86_expand_compare won't emit anything if ix86_compare_emitted
10334 if (ix86_compare_emitted)
10337 switch (GET_MODE (ix86_compare_op0))
10343 tmp = ix86_expand_compare (code, NULL, NULL);
10344 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10345 gen_rtx_LABEL_REF (VOIDmode, label),
10347 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10356 enum rtx_code bypass_code, first_code, second_code;
10358 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10359 &ix86_compare_op1);
10361 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10363 /* Check whether we will use the natural sequence with one jump. If
10364 so, we can expand jump early. Otherwise delay expansion by
10365 creating compound insn to not confuse optimizers. */
10366 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10369 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10370 gen_rtx_LABEL_REF (VOIDmode, label),
10371 pc_rtx, NULL_RTX, NULL_RTX);
10375 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10376 ix86_compare_op0, ix86_compare_op1);
10377 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10378 gen_rtx_LABEL_REF (VOIDmode, label),
10380 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10382 use_fcomi = ix86_use_fcomi_compare (code);
10383 vec = rtvec_alloc (3 + !use_fcomi);
10384 RTVEC_ELT (vec, 0) = tmp;
10386 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10388 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10391 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10393 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10402 /* Expand DImode branch into multiple compare+branch. */
10404 rtx lo[2], hi[2], label2;
10405 enum rtx_code code1, code2, code3;
10406 enum machine_mode submode;
10408 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10410 tmp = ix86_compare_op0;
10411 ix86_compare_op0 = ix86_compare_op1;
10412 ix86_compare_op1 = tmp;
10413 code = swap_condition (code);
10415 if (GET_MODE (ix86_compare_op0) == DImode)
10417 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10418 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10423 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10424 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10428 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10429 avoid two branches. This costs one extra insn, so disable when
10430 optimizing for size. */
10432 if ((code == EQ || code == NE)
10434 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10439 if (hi[1] != const0_rtx)
10440 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10441 NULL_RTX, 0, OPTAB_WIDEN);
10444 if (lo[1] != const0_rtx)
10445 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10446 NULL_RTX, 0, OPTAB_WIDEN);
10448 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10449 NULL_RTX, 0, OPTAB_WIDEN);
10451 ix86_compare_op0 = tmp;
10452 ix86_compare_op1 = const0_rtx;
10453 ix86_expand_branch (code, label);
10457 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10458 op1 is a constant and the low word is zero, then we can just
10459 examine the high word. */
10461 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10464 case LT: case LTU: case GE: case GEU:
10465 ix86_compare_op0 = hi[0];
10466 ix86_compare_op1 = hi[1];
10467 ix86_expand_branch (code, label);
10473 /* Otherwise, we need two or three jumps. */
10475 label2 = gen_label_rtx ();
10478 code2 = swap_condition (code);
10479 code3 = unsigned_condition (code);
10483 case LT: case GT: case LTU: case GTU:
10486 case LE: code1 = LT; code2 = GT; break;
10487 case GE: code1 = GT; code2 = LT; break;
10488 case LEU: code1 = LTU; code2 = GTU; break;
10489 case GEU: code1 = GTU; code2 = LTU; break;
10491 case EQ: code1 = UNKNOWN; code2 = NE; break;
10492 case NE: code2 = UNKNOWN; break;
10495 gcc_unreachable ();
10500 * if (hi(a) < hi(b)) goto true;
10501 * if (hi(a) > hi(b)) goto false;
10502 * if (lo(a) < lo(b)) goto true;
10506 ix86_compare_op0 = hi[0];
10507 ix86_compare_op1 = hi[1];
10509 if (code1 != UNKNOWN)
10510 ix86_expand_branch (code1, label);
10511 if (code2 != UNKNOWN)
10512 ix86_expand_branch (code2, label2);
10514 ix86_compare_op0 = lo[0];
10515 ix86_compare_op1 = lo[1];
10516 ix86_expand_branch (code3, label);
10518 if (code2 != UNKNOWN)
10519 emit_label (label2);
10524 gcc_unreachable ();
10528 /* Split branch based on floating point condition. */
10530 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10531 rtx target1, rtx target2, rtx tmp, rtx pushed)
10533 rtx second, bypass;
10534 rtx label = NULL_RTX;
10536 int bypass_probability = -1, second_probability = -1, probability = -1;
10539 if (target2 != pc_rtx)
10542 code = reverse_condition_maybe_unordered (code);
10547 condition = ix86_expand_fp_compare (code, op1, op2,
10548 tmp, &second, &bypass);
10550 /* Remove pushed operand from stack. */
10552 ix86_free_from_memory (GET_MODE (pushed));
10554 if (split_branch_probability >= 0)
10556 /* Distribute the probabilities across the jumps.
10557 Assume the BYPASS and SECOND to be always test
10559 probability = split_branch_probability;
10561 /* Value of 1 is low enough to make no need for probability
10562 to be updated. Later we may run some experiments and see
10563 if unordered values are more frequent in practice. */
10565 bypass_probability = 1;
10567 second_probability = 1;
10569 if (bypass != NULL_RTX)
10571 label = gen_label_rtx ();
10572 i = emit_jump_insn (gen_rtx_SET
10574 gen_rtx_IF_THEN_ELSE (VOIDmode,
10576 gen_rtx_LABEL_REF (VOIDmode,
10579 if (bypass_probability >= 0)
10581 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10582 GEN_INT (bypass_probability),
10585 i = emit_jump_insn (gen_rtx_SET
10587 gen_rtx_IF_THEN_ELSE (VOIDmode,
10588 condition, target1, target2)));
10589 if (probability >= 0)
10591 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10592 GEN_INT (probability),
10594 if (second != NULL_RTX)
10596 i = emit_jump_insn (gen_rtx_SET
10598 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10600 if (second_probability >= 0)
10602 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10603 GEN_INT (second_probability),
10606 if (label != NULL_RTX)
10607 emit_label (label);
10611 ix86_expand_setcc (enum rtx_code code, rtx dest)
10613 rtx ret, tmp, tmpreg, equiv;
10614 rtx second_test, bypass_test;
10616 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10617 return 0; /* FAIL */
10619 gcc_assert (GET_MODE (dest) == QImode);
10621 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10622 PUT_MODE (ret, QImode);
10627 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10628 if (bypass_test || second_test)
10630 rtx test = second_test;
10632 rtx tmp2 = gen_reg_rtx (QImode);
10635 gcc_assert (!second_test);
10636 test = bypass_test;
10638 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10640 PUT_MODE (test, QImode);
10641 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10644 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10646 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10649 /* Attach a REG_EQUAL note describing the comparison result. */
10650 if (ix86_compare_op0 && ix86_compare_op1)
10652 equiv = simplify_gen_relational (code, QImode,
10653 GET_MODE (ix86_compare_op0),
10654 ix86_compare_op0, ix86_compare_op1);
10655 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10658 return 1; /* DONE */
10661 /* Expand comparison setting or clearing carry flag. Return true when
10662 successful and set pop for the operation. */
10664 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10666 enum machine_mode mode =
10667 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10669 /* Do not handle DImode compares that go through special path. Also we can't
10670 deal with FP compares yet. This is possible to add. */
10671 if (mode == (TARGET_64BIT ? TImode : DImode))
10673 if (FLOAT_MODE_P (mode))
10675 rtx second_test = NULL, bypass_test = NULL;
10676 rtx compare_op, compare_seq;
10678 /* Shortcut: following common codes never translate into carry flag compares. */
10679 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10680 || code == ORDERED || code == UNORDERED)
10683 /* These comparisons require zero flag; swap operands so they won't. */
10684 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10685 && !TARGET_IEEE_FP)
10690 code = swap_condition (code);
10693 /* Try to expand the comparison and verify that we end up with carry flag
10694 based comparison. This is fails to be true only when we decide to expand
10695 comparison using arithmetic that is not too common scenario. */
10697 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10698 &second_test, &bypass_test);
10699 compare_seq = get_insns ();
10702 if (second_test || bypass_test)
10704 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10705 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10706 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10708 code = GET_CODE (compare_op);
10709 if (code != LTU && code != GEU)
10711 emit_insn (compare_seq);
10715 if (!INTEGRAL_MODE_P (mode))
10723 /* Convert a==0 into (unsigned)a<1. */
10726 if (op1 != const0_rtx)
10729 code = (code == EQ ? LTU : GEU);
10732 /* Convert a>b into b<a or a>=b-1. */
10735 if (GET_CODE (op1) == CONST_INT)
10737 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10738 /* Bail out on overflow. We still can swap operands but that
10739 would force loading of the constant into register. */
10740 if (op1 == const0_rtx
10741 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10743 code = (code == GTU ? GEU : LTU);
10750 code = (code == GTU ? LTU : GEU);
10754 /* Convert a>=0 into (unsigned)a<0x80000000. */
10757 if (mode == DImode || op1 != const0_rtx)
10759 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10760 code = (code == LT ? GEU : LTU);
10764 if (mode == DImode || op1 != constm1_rtx)
10766 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10767 code = (code == LE ? GEU : LTU);
10773 /* Swapping operands may cause constant to appear as first operand. */
10774 if (!nonimmediate_operand (op0, VOIDmode))
10776 if (no_new_pseudos)
10778 op0 = force_reg (mode, op0);
10780 ix86_compare_op0 = op0;
10781 ix86_compare_op1 = op1;
10782 *pop = ix86_expand_compare (code, NULL, NULL);
10783 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10788 ix86_expand_int_movcc (rtx operands[])
10790 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10791 rtx compare_seq, compare_op;
10792 rtx second_test, bypass_test;
10793 enum machine_mode mode = GET_MODE (operands[0]);
10794 bool sign_bit_compare_p = false;;
10797 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10798 compare_seq = get_insns ();
10801 compare_code = GET_CODE (compare_op);
10803 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10804 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10805 sign_bit_compare_p = true;
10807 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10808 HImode insns, we'd be swallowed in word prefix ops. */
10810 if ((mode != HImode || TARGET_FAST_PREFIX)
10811 && (mode != (TARGET_64BIT ? TImode : DImode))
10812 && GET_CODE (operands[2]) == CONST_INT
10813 && GET_CODE (operands[3]) == CONST_INT)
10815 rtx out = operands[0];
10816 HOST_WIDE_INT ct = INTVAL (operands[2]);
10817 HOST_WIDE_INT cf = INTVAL (operands[3]);
10818 HOST_WIDE_INT diff;
10821 /* Sign bit compares are better done using shifts than we do by using
10823 if (sign_bit_compare_p
10824 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10825 ix86_compare_op1, &compare_op))
10827 /* Detect overlap between destination and compare sources. */
10830 if (!sign_bit_compare_p)
10832 bool fpcmp = false;
10834 compare_code = GET_CODE (compare_op);
10836 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10837 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10840 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10843 /* To simplify rest of code, restrict to the GEU case. */
10844 if (compare_code == LTU)
10846 HOST_WIDE_INT tmp = ct;
10849 compare_code = reverse_condition (compare_code);
10850 code = reverse_condition (code);
10855 PUT_CODE (compare_op,
10856 reverse_condition_maybe_unordered
10857 (GET_CODE (compare_op)));
10859 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10863 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10864 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10865 tmp = gen_reg_rtx (mode);
10867 if (mode == DImode)
10868 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10870 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10874 if (code == GT || code == GE)
10875 code = reverse_condition (code);
10878 HOST_WIDE_INT tmp = ct;
10883 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10884 ix86_compare_op1, VOIDmode, 0, -1);
10897 tmp = expand_simple_binop (mode, PLUS,
10899 copy_rtx (tmp), 1, OPTAB_DIRECT);
10910 tmp = expand_simple_binop (mode, IOR,
10912 copy_rtx (tmp), 1, OPTAB_DIRECT);
10914 else if (diff == -1 && ct)
10924 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10926 tmp = expand_simple_binop (mode, PLUS,
10927 copy_rtx (tmp), GEN_INT (cf),
10928 copy_rtx (tmp), 1, OPTAB_DIRECT);
10936 * andl cf - ct, dest
10946 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10949 tmp = expand_simple_binop (mode, AND,
10951 gen_int_mode (cf - ct, mode),
10952 copy_rtx (tmp), 1, OPTAB_DIRECT);
10954 tmp = expand_simple_binop (mode, PLUS,
10955 copy_rtx (tmp), GEN_INT (ct),
10956 copy_rtx (tmp), 1, OPTAB_DIRECT);
10959 if (!rtx_equal_p (tmp, out))
10960 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10962 return 1; /* DONE */
10968 tmp = ct, ct = cf, cf = tmp;
10970 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10972 /* We may be reversing unordered compare to normal compare, that
10973 is not valid in general (we may convert non-trapping condition
10974 to trapping one), however on i386 we currently emit all
10975 comparisons unordered. */
10976 compare_code = reverse_condition_maybe_unordered (compare_code);
10977 code = reverse_condition_maybe_unordered (code);
10981 compare_code = reverse_condition (compare_code);
10982 code = reverse_condition (code);
10986 compare_code = UNKNOWN;
10987 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10988 && GET_CODE (ix86_compare_op1) == CONST_INT)
10990 if (ix86_compare_op1 == const0_rtx
10991 && (code == LT || code == GE))
10992 compare_code = code;
10993 else if (ix86_compare_op1 == constm1_rtx)
10997 else if (code == GT)
11002 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11003 if (compare_code != UNKNOWN
11004 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11005 && (cf == -1 || ct == -1))
11007 /* If lea code below could be used, only optimize
11008 if it results in a 2 insn sequence. */
11010 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11011 || diff == 3 || diff == 5 || diff == 9)
11012 || (compare_code == LT && ct == -1)
11013 || (compare_code == GE && cf == -1))
11016 * notl op1 (if necessary)
11024 code = reverse_condition (code);
11027 out = emit_store_flag (out, code, ix86_compare_op0,
11028 ix86_compare_op1, VOIDmode, 0, -1);
11030 out = expand_simple_binop (mode, IOR,
11032 out, 1, OPTAB_DIRECT);
11033 if (out != operands[0])
11034 emit_move_insn (operands[0], out);
11036 return 1; /* DONE */
11041 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11042 || diff == 3 || diff == 5 || diff == 9)
11043 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11045 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11051 * lea cf(dest*(ct-cf)),dest
11055 * This also catches the degenerate setcc-only case.
11061 out = emit_store_flag (out, code, ix86_compare_op0,
11062 ix86_compare_op1, VOIDmode, 0, 1);
11065 /* On x86_64 the lea instruction operates on Pmode, so we need
11066 to get arithmetics done in proper mode to match. */
11068 tmp = copy_rtx (out);
11072 out1 = copy_rtx (out);
11073 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11077 tmp = gen_rtx_PLUS (mode, tmp, out1);
11083 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11086 if (!rtx_equal_p (tmp, out))
11089 out = force_operand (tmp, copy_rtx (out));
11091 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11093 if (!rtx_equal_p (out, operands[0]))
11094 emit_move_insn (operands[0], copy_rtx (out));
11096 return 1; /* DONE */
11100 * General case: Jumpful:
11101 * xorl dest,dest cmpl op1, op2
11102 * cmpl op1, op2 movl ct, dest
11103 * setcc dest jcc 1f
11104 * decl dest movl cf, dest
11105 * andl (cf-ct),dest 1:
11108 * Size 20. Size 14.
11110 * This is reasonably steep, but branch mispredict costs are
11111 * high on modern cpus, so consider failing only if optimizing
11115 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11116 && BRANCH_COST >= 2)
11122 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11123 /* We may be reversing unordered compare to normal compare,
11124 that is not valid in general (we may convert non-trapping
11125 condition to trapping one), however on i386 we currently
11126 emit all comparisons unordered. */
11127 code = reverse_condition_maybe_unordered (code);
11130 code = reverse_condition (code);
11131 if (compare_code != UNKNOWN)
11132 compare_code = reverse_condition (compare_code);
11136 if (compare_code != UNKNOWN)
11138 /* notl op1 (if needed)
11143 For x < 0 (resp. x <= -1) there will be no notl,
11144 so if possible swap the constants to get rid of the
11146 True/false will be -1/0 while code below (store flag
11147 followed by decrement) is 0/-1, so the constants need
11148 to be exchanged once more. */
11150 if (compare_code == GE || !cf)
11152 code = reverse_condition (code);
11157 HOST_WIDE_INT tmp = cf;
11162 out = emit_store_flag (out, code, ix86_compare_op0,
11163 ix86_compare_op1, VOIDmode, 0, -1);
11167 out = emit_store_flag (out, code, ix86_compare_op0,
11168 ix86_compare_op1, VOIDmode, 0, 1);
11170 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11171 copy_rtx (out), 1, OPTAB_DIRECT);
11174 out = expand_simple_binop (mode, AND, copy_rtx (out),
11175 gen_int_mode (cf - ct, mode),
11176 copy_rtx (out), 1, OPTAB_DIRECT);
11178 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11179 copy_rtx (out), 1, OPTAB_DIRECT);
11180 if (!rtx_equal_p (out, operands[0]))
11181 emit_move_insn (operands[0], copy_rtx (out));
11183 return 1; /* DONE */
11187 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11189 /* Try a few things more with specific constants and a variable. */
11192 rtx var, orig_out, out, tmp;
11194 if (BRANCH_COST <= 2)
11195 return 0; /* FAIL */
11197 /* If one of the two operands is an interesting constant, load a
11198 constant with the above and mask it in with a logical operation. */
11200 if (GET_CODE (operands[2]) == CONST_INT)
11203 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11204 operands[3] = constm1_rtx, op = and_optab;
11205 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11206 operands[3] = const0_rtx, op = ior_optab;
11208 return 0; /* FAIL */
11210 else if (GET_CODE (operands[3]) == CONST_INT)
11213 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11214 operands[2] = constm1_rtx, op = and_optab;
11215 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11216 operands[2] = const0_rtx, op = ior_optab;
11218 return 0; /* FAIL */
11221 return 0; /* FAIL */
11223 orig_out = operands[0];
11224 tmp = gen_reg_rtx (mode);
11227 /* Recurse to get the constant loaded. */
11228 if (ix86_expand_int_movcc (operands) == 0)
11229 return 0; /* FAIL */
11231 /* Mask in the interesting variable. */
11232 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11234 if (!rtx_equal_p (out, orig_out))
11235 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11237 return 1; /* DONE */
11241 * For comparison with above,
11251 if (! nonimmediate_operand (operands[2], mode))
11252 operands[2] = force_reg (mode, operands[2]);
11253 if (! nonimmediate_operand (operands[3], mode))
11254 operands[3] = force_reg (mode, operands[3]);
11256 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11258 rtx tmp = gen_reg_rtx (mode);
11259 emit_move_insn (tmp, operands[3]);
11262 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11264 rtx tmp = gen_reg_rtx (mode);
11265 emit_move_insn (tmp, operands[2]);
11269 if (! register_operand (operands[2], VOIDmode)
11271 || ! register_operand (operands[3], VOIDmode)))
11272 operands[2] = force_reg (mode, operands[2]);
11275 && ! register_operand (operands[3], VOIDmode))
11276 operands[3] = force_reg (mode, operands[3]);
11278 emit_insn (compare_seq);
11279 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11280 gen_rtx_IF_THEN_ELSE (mode,
11281 compare_op, operands[2],
11284 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11285 gen_rtx_IF_THEN_ELSE (mode,
11287 copy_rtx (operands[3]),
11288 copy_rtx (operands[0]))));
11290 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11291 gen_rtx_IF_THEN_ELSE (mode,
11293 copy_rtx (operands[2]),
11294 copy_rtx (operands[0]))));
11296 return 1; /* DONE */
11299 /* Swap, force into registers, or otherwise massage the two operands
11300 to an sse comparison with a mask result. Thus we differ a bit from
11301 ix86_prepare_fp_compare_args which expects to produce a flags result.
11303 The DEST operand exists to help determine whether to commute commutative
11304 operators. The POP0/POP1 operands are updated in place. The new
11305 comparison code is returned, or UNKNOWN if not implementable. */
11307 static enum rtx_code
11308 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11309 rtx *pop0, rtx *pop1)
11317 /* We have no LTGT as an operator. We could implement it with
11318 NE & ORDERED, but this requires an extra temporary. It's
11319 not clear that it's worth it. */
11326 /* These are supported directly. */
11333 /* For commutative operators, try to canonicalize the destination
11334 operand to be first in the comparison - this helps reload to
11335 avoid extra moves. */
11336 if (!dest || !rtx_equal_p (dest, *pop1))
11344 /* These are not supported directly. Swap the comparison operands
11345 to transform into something that is supported. */
11349 code = swap_condition (code);
11353 gcc_unreachable ();
11359 /* Detect conditional moves that exactly match min/max operational
11360 semantics. Note that this is IEEE safe, as long as we don't
11361 interchange the operands.
11363 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11364 and TRUE if the operation is successful and instructions are emitted. */
11367 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11368 rtx cmp_op1, rtx if_true, rtx if_false)
11370 enum machine_mode mode;
11376 else if (code == UNGE)
11379 if_true = if_false;
11385 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11387 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11392 mode = GET_MODE (dest);
11394 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11395 but MODE may be a vector mode and thus not appropriate. */
11396 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11398 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11401 if_true = force_reg (mode, if_true);
11402 v = gen_rtvec (2, if_true, if_false);
11403 tmp = gen_rtx_UNSPEC (mode, v, u);
11407 code = is_min ? SMIN : SMAX;
11408 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11411 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11415 /* Expand an sse vector comparison. Return the register with the result. */
11418 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11419 rtx op_true, rtx op_false)
11421 enum machine_mode mode = GET_MODE (dest);
11424 cmp_op0 = force_reg (mode, cmp_op0);
11425 if (!nonimmediate_operand (cmp_op1, mode))
11426 cmp_op1 = force_reg (mode, cmp_op1);
11429 || reg_overlap_mentioned_p (dest, op_true)
11430 || reg_overlap_mentioned_p (dest, op_false))
11431 dest = gen_reg_rtx (mode);
11433 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11434 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11439 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11440 operations. This is used for both scalar and vector conditional moves. */
11443 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11445 enum machine_mode mode = GET_MODE (dest);
11448 if (op_false == CONST0_RTX (mode))
11450 op_true = force_reg (mode, op_true);
11451 x = gen_rtx_AND (mode, cmp, op_true);
11452 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11454 else if (op_true == CONST0_RTX (mode))
11456 op_false = force_reg (mode, op_false);
11457 x = gen_rtx_NOT (mode, cmp);
11458 x = gen_rtx_AND (mode, x, op_false);
11459 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11463 op_true = force_reg (mode, op_true);
11464 op_false = force_reg (mode, op_false);
11466 t2 = gen_reg_rtx (mode);
11468 t3 = gen_reg_rtx (mode);
11472 x = gen_rtx_AND (mode, op_true, cmp);
11473 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11475 x = gen_rtx_NOT (mode, cmp);
11476 x = gen_rtx_AND (mode, x, op_false);
11477 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11479 x = gen_rtx_IOR (mode, t3, t2);
11480 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11484 /* Expand a floating-point conditional move. Return true if successful. */
11487 ix86_expand_fp_movcc (rtx operands[])
11489 enum machine_mode mode = GET_MODE (operands[0]);
11490 enum rtx_code code = GET_CODE (operands[1]);
11491 rtx tmp, compare_op, second_test, bypass_test;
11493 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11495 enum machine_mode cmode;
11497 /* Since we've no cmove for sse registers, don't force bad register
11498 allocation just to gain access to it. Deny movcc when the
11499 comparison mode doesn't match the move mode. */
11500 cmode = GET_MODE (ix86_compare_op0);
11501 if (cmode == VOIDmode)
11502 cmode = GET_MODE (ix86_compare_op1);
11506 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11508 &ix86_compare_op1);
11509 if (code == UNKNOWN)
11512 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11513 ix86_compare_op1, operands[2],
11517 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11518 ix86_compare_op1, operands[2], operands[3]);
11519 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11523 /* The floating point conditional move instructions don't directly
11524 support conditions resulting from a signed integer comparison. */
11526 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11528 /* The floating point conditional move instructions don't directly
11529 support signed integer comparisons. */
11531 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11533 gcc_assert (!second_test && !bypass_test);
11534 tmp = gen_reg_rtx (QImode);
11535 ix86_expand_setcc (code, tmp);
11537 ix86_compare_op0 = tmp;
11538 ix86_compare_op1 = const0_rtx;
11539 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11541 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11543 tmp = gen_reg_rtx (mode);
11544 emit_move_insn (tmp, operands[3]);
11547 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11549 tmp = gen_reg_rtx (mode);
11550 emit_move_insn (tmp, operands[2]);
11554 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11555 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11556 operands[2], operands[3])));
11558 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11559 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11560 operands[3], operands[0])));
11562 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11563 gen_rtx_IF_THEN_ELSE (mode, second_test,
11564 operands[2], operands[0])));
11569 /* Expand a floating-point vector conditional move; a vcond operation
11570 rather than a movcc operation. */
11573 ix86_expand_fp_vcond (rtx operands[])
11575 enum rtx_code code = GET_CODE (operands[3]);
11578 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11579 &operands[4], &operands[5]);
11580 if (code == UNKNOWN)
11583 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11584 operands[5], operands[1], operands[2]))
11587 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11588 operands[1], operands[2]);
11589 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11593 /* Expand a signed integral vector conditional move. */
11596 ix86_expand_int_vcond (rtx operands[])
11598 enum machine_mode mode = GET_MODE (operands[0]);
11599 enum rtx_code code = GET_CODE (operands[3]);
11600 bool negate = false;
11603 cop0 = operands[4];
11604 cop1 = operands[5];
11606 /* Canonicalize the comparison to EQ, GT, GTU. */
11617 code = reverse_condition (code);
11623 code = reverse_condition (code);
11629 code = swap_condition (code);
11630 x = cop0, cop0 = cop1, cop1 = x;
11634 gcc_unreachable ();
11637 /* Unsigned parallel compare is not supported by the hardware. Play some
11638 tricks to turn this into a signed comparison against 0. */
11641 cop0 = force_reg (mode, cop0);
11649 /* Perform a parallel modulo subtraction. */
11650 t1 = gen_reg_rtx (mode);
11651 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11653 /* Extract the original sign bit of op0. */
11654 mask = GEN_INT (-0x80000000);
11655 mask = gen_rtx_CONST_VECTOR (mode,
11656 gen_rtvec (4, mask, mask, mask, mask));
11657 mask = force_reg (mode, mask);
11658 t2 = gen_reg_rtx (mode);
11659 emit_insn (gen_andv4si3 (t2, cop0, mask));
11661 /* XOR it back into the result of the subtraction. This results
11662 in the sign bit set iff we saw unsigned underflow. */
11663 x = gen_reg_rtx (mode);
11664 emit_insn (gen_xorv4si3 (x, t1, t2));
11672 /* Perform a parallel unsigned saturating subtraction. */
11673 x = gen_reg_rtx (mode);
11674 emit_insn (gen_rtx_SET (VOIDmode, x,
11675 gen_rtx_US_MINUS (mode, cop0, cop1)));
11682 gcc_unreachable ();
11686 cop1 = CONST0_RTX (mode);
11689 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11690 operands[1+negate], operands[2-negate]);
11692 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11693 operands[2-negate]);
11697 /* Expand conditional increment or decrement using adb/sbb instructions.
11698 The default case using setcc followed by the conditional move can be
11699 done by generic code. */
11701 ix86_expand_int_addcc (rtx operands[])
11703 enum rtx_code code = GET_CODE (operands[1]);
11705 rtx val = const0_rtx;
11706 bool fpcmp = false;
11707 enum machine_mode mode = GET_MODE (operands[0]);
11709 if (operands[3] != const1_rtx
11710 && operands[3] != constm1_rtx)
11712 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11713 ix86_compare_op1, &compare_op))
11715 code = GET_CODE (compare_op);
11717 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11718 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11721 code = ix86_fp_compare_code_to_integer (code);
11728 PUT_CODE (compare_op,
11729 reverse_condition_maybe_unordered
11730 (GET_CODE (compare_op)));
11732 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11734 PUT_MODE (compare_op, mode);
11736 /* Construct either adc or sbb insn. */
11737 if ((code == LTU) == (operands[3] == constm1_rtx))
11739 switch (GET_MODE (operands[0]))
11742 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11745 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11748 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11751 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11754 gcc_unreachable ();
11759 switch (GET_MODE (operands[0]))
11762 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11765 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11768 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11771 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11774 gcc_unreachable ();
11777 return 1; /* DONE */
11781 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11782 works for floating pointer parameters and nonoffsetable memories.
11783 For pushes, it returns just stack offsets; the values will be saved
11784 in the right order. Maximally three parts are generated. */
11787 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11792 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11794 size = (GET_MODE_SIZE (mode) + 4) / 8;
11796 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11797 gcc_assert (size >= 2 && size <= 3);
11799 /* Optimize constant pool reference to immediates. This is used by fp
11800 moves, that force all constants to memory to allow combining. */
11801 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11803 rtx tmp = maybe_get_pool_constant (operand);
11808 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11810 /* The only non-offsetable memories we handle are pushes. */
11811 int ok = push_operand (operand, VOIDmode);
11815 operand = copy_rtx (operand);
11816 PUT_MODE (operand, Pmode);
11817 parts[0] = parts[1] = parts[2] = operand;
11821 if (GET_CODE (operand) == CONST_VECTOR)
11823 enum machine_mode imode = int_mode_for_mode (mode);
11824 /* Caution: if we looked through a constant pool memory above,
11825 the operand may actually have a different mode now. That's
11826 ok, since we want to pun this all the way back to an integer. */
11827 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11828 gcc_assert (operand != NULL);
11834 if (mode == DImode)
11835 split_di (&operand, 1, &parts[0], &parts[1]);
11838 if (REG_P (operand))
11840 gcc_assert (reload_completed);
11841 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11842 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11844 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11846 else if (offsettable_memref_p (operand))
11848 operand = adjust_address (operand, SImode, 0);
11849 parts[0] = operand;
11850 parts[1] = adjust_address (operand, SImode, 4);
11852 parts[2] = adjust_address (operand, SImode, 8);
11854 else if (GET_CODE (operand) == CONST_DOUBLE)
11859 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11863 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11864 parts[2] = gen_int_mode (l[2], SImode);
11867 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11870 gcc_unreachable ();
11872 parts[1] = gen_int_mode (l[1], SImode);
11873 parts[0] = gen_int_mode (l[0], SImode);
11876 gcc_unreachable ();
11881 if (mode == TImode)
11882 split_ti (&operand, 1, &parts[0], &parts[1]);
11883 if (mode == XFmode || mode == TFmode)
11885 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11886 if (REG_P (operand))
11888 gcc_assert (reload_completed);
11889 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11890 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11892 else if (offsettable_memref_p (operand))
11894 operand = adjust_address (operand, DImode, 0);
11895 parts[0] = operand;
11896 parts[1] = adjust_address (operand, upper_mode, 8);
11898 else if (GET_CODE (operand) == CONST_DOUBLE)
11903 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11904 real_to_target (l, &r, mode);
11906 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11907 if (HOST_BITS_PER_WIDE_INT >= 64)
11910 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11911 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11914 parts[0] = immed_double_const (l[0], l[1], DImode);
11916 if (upper_mode == SImode)
11917 parts[1] = gen_int_mode (l[2], SImode);
11918 else if (HOST_BITS_PER_WIDE_INT >= 64)
11921 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11922 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11925 parts[1] = immed_double_const (l[2], l[3], DImode);
11928 gcc_unreachable ();
11935 /* Emit insns to perform a move or push of DI, DF, and XF values.
11936 Return false when normal moves are needed; true when all required
11937 insns have been emitted. Operands 2-4 contain the input values
11938 int the correct order; operands 5-7 contain the output values. */
11941 ix86_split_long_move (rtx operands[])
11946 int collisions = 0;
11947 enum machine_mode mode = GET_MODE (operands[0]);
11949 /* The DFmode expanders may ask us to move double.
11950 For 64bit target this is single move. By hiding the fact
11951 here we simplify i386.md splitters. */
11952 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11954 /* Optimize constant pool reference to immediates. This is used by
11955 fp moves, that force all constants to memory to allow combining. */
11957 if (GET_CODE (operands[1]) == MEM
11958 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11959 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11960 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11961 if (push_operand (operands[0], VOIDmode))
11963 operands[0] = copy_rtx (operands[0]);
11964 PUT_MODE (operands[0], Pmode);
11967 operands[0] = gen_lowpart (DImode, operands[0]);
11968 operands[1] = gen_lowpart (DImode, operands[1]);
11969 emit_move_insn (operands[0], operands[1]);
11973 /* The only non-offsettable memory we handle is push. */
11974 if (push_operand (operands[0], VOIDmode))
11977 gcc_assert (GET_CODE (operands[0]) != MEM
11978 || offsettable_memref_p (operands[0]));
11980 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11981 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
11983 /* When emitting push, take care for source operands on the stack. */
11984 if (push && GET_CODE (operands[1]) == MEM
11985 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11988 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11989 XEXP (part[1][2], 0));
11990 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11991 XEXP (part[1][1], 0));
11994 /* We need to do copy in the right order in case an address register
11995 of the source overlaps the destination. */
11996 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11998 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12000 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12003 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12006 /* Collision in the middle part can be handled by reordering. */
12007 if (collisions == 1 && nparts == 3
12008 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12011 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12012 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12015 /* If there are more collisions, we can't handle it by reordering.
12016 Do an lea to the last part and use only one colliding move. */
12017 else if (collisions > 1)
12023 base = part[0][nparts - 1];
12025 /* Handle the case when the last part isn't valid for lea.
12026 Happens in 64-bit mode storing the 12-byte XFmode. */
12027 if (GET_MODE (base) != Pmode)
12028 base = gen_rtx_REG (Pmode, REGNO (base));
12030 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12031 part[1][0] = replace_equiv_address (part[1][0], base);
12032 part[1][1] = replace_equiv_address (part[1][1],
12033 plus_constant (base, UNITS_PER_WORD));
12035 part[1][2] = replace_equiv_address (part[1][2],
12036 plus_constant (base, 8));
12046 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12047 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12048 emit_move_insn (part[0][2], part[1][2]);
12053 /* In 64bit mode we don't have 32bit push available. In case this is
12054 register, it is OK - we will just use larger counterpart. We also
12055 retype memory - these comes from attempt to avoid REX prefix on
12056 moving of second half of TFmode value. */
12057 if (GET_MODE (part[1][1]) == SImode)
12059 switch (GET_CODE (part[1][1]))
12062 part[1][1] = adjust_address (part[1][1], DImode, 0);
12066 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12070 gcc_unreachable ();
12073 if (GET_MODE (part[1][0]) == SImode)
12074 part[1][0] = part[1][1];
12077 emit_move_insn (part[0][1], part[1][1]);
12078 emit_move_insn (part[0][0], part[1][0]);
12082 /* Choose correct order to not overwrite the source before it is copied. */
12083 if ((REG_P (part[0][0])
12084 && REG_P (part[1][1])
12085 && (REGNO (part[0][0]) == REGNO (part[1][1])
12087 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12089 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12093 operands[2] = part[0][2];
12094 operands[3] = part[0][1];
12095 operands[4] = part[0][0];
12096 operands[5] = part[1][2];
12097 operands[6] = part[1][1];
12098 operands[7] = part[1][0];
12102 operands[2] = part[0][1];
12103 operands[3] = part[0][0];
12104 operands[5] = part[1][1];
12105 operands[6] = part[1][0];
12112 operands[2] = part[0][0];
12113 operands[3] = part[0][1];
12114 operands[4] = part[0][2];
12115 operands[5] = part[1][0];
12116 operands[6] = part[1][1];
12117 operands[7] = part[1][2];
12121 operands[2] = part[0][0];
12122 operands[3] = part[0][1];
12123 operands[5] = part[1][0];
12124 operands[6] = part[1][1];
12128 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12131 if (GET_CODE (operands[5]) == CONST_INT
12132 && operands[5] != const0_rtx
12133 && REG_P (operands[2]))
12135 if (GET_CODE (operands[6]) == CONST_INT
12136 && INTVAL (operands[6]) == INTVAL (operands[5]))
12137 operands[6] = operands[2];
12140 && GET_CODE (operands[7]) == CONST_INT
12141 && INTVAL (operands[7]) == INTVAL (operands[5]))
12142 operands[7] = operands[2];
12146 && GET_CODE (operands[6]) == CONST_INT
12147 && operands[6] != const0_rtx
12148 && REG_P (operands[3])
12149 && GET_CODE (operands[7]) == CONST_INT
12150 && INTVAL (operands[7]) == INTVAL (operands[6]))
12151 operands[7] = operands[3];
12154 emit_move_insn (operands[2], operands[5]);
12155 emit_move_insn (operands[3], operands[6]);
12157 emit_move_insn (operands[4], operands[7]);
12162 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12163 left shift by a constant, either using a single shift or
12164 a sequence of add instructions. */
12167 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12171 emit_insn ((mode == DImode
12173 : gen_adddi3) (operand, operand, operand));
12175 else if (!optimize_size
12176 && count * ix86_cost->add <= ix86_cost->shift_const)
12179 for (i=0; i<count; i++)
12181 emit_insn ((mode == DImode
12183 : gen_adddi3) (operand, operand, operand));
12187 emit_insn ((mode == DImode
12189 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12193 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12195 rtx low[2], high[2];
12197 const int single_width = mode == DImode ? 32 : 64;
12199 if (GET_CODE (operands[2]) == CONST_INT)
12201 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12202 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12204 if (count >= single_width)
12206 emit_move_insn (high[0], low[1]);
12207 emit_move_insn (low[0], const0_rtx);
12209 if (count > single_width)
12210 ix86_expand_ashl_const (high[0], count - single_width, mode);
12214 if (!rtx_equal_p (operands[0], operands[1]))
12215 emit_move_insn (operands[0], operands[1]);
12216 emit_insn ((mode == DImode
12218 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12219 ix86_expand_ashl_const (low[0], count, mode);
12224 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12226 if (operands[1] == const1_rtx)
12228 /* Assuming we've chosen a QImode capable registers, then 1 << N
12229 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12230 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12232 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12234 ix86_expand_clear (low[0]);
12235 ix86_expand_clear (high[0]);
12236 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12238 d = gen_lowpart (QImode, low[0]);
12239 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12240 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12241 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12243 d = gen_lowpart (QImode, high[0]);
12244 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12245 s = gen_rtx_NE (QImode, flags, const0_rtx);
12246 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12249 /* Otherwise, we can get the same results by manually performing
12250 a bit extract operation on bit 5/6, and then performing the two
12251 shifts. The two methods of getting 0/1 into low/high are exactly
12252 the same size. Avoiding the shift in the bit extract case helps
12253 pentium4 a bit; no one else seems to care much either way. */
12258 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12259 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12261 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12262 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12264 emit_insn ((mode == DImode
12266 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12267 emit_insn ((mode == DImode
12269 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12270 emit_move_insn (low[0], high[0]);
12271 emit_insn ((mode == DImode
12273 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12276 emit_insn ((mode == DImode
12278 : gen_ashldi3) (low[0], low[0], operands[2]));
12279 emit_insn ((mode == DImode
12281 : gen_ashldi3) (high[0], high[0], operands[2]));
12285 if (operands[1] == constm1_rtx)
12287 /* For -1 << N, we can avoid the shld instruction, because we
12288 know that we're shifting 0...31/63 ones into a -1. */
12289 emit_move_insn (low[0], constm1_rtx);
12291 emit_move_insn (high[0], low[0]);
12293 emit_move_insn (high[0], constm1_rtx);
12297 if (!rtx_equal_p (operands[0], operands[1]))
12298 emit_move_insn (operands[0], operands[1]);
12300 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12301 emit_insn ((mode == DImode
12303 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12306 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12308 if (TARGET_CMOVE && scratch)
12310 ix86_expand_clear (scratch);
12311 emit_insn ((mode == DImode
12312 ? gen_x86_shift_adj_1
12313 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12316 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12320 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12322 rtx low[2], high[2];
12324 const int single_width = mode == DImode ? 32 : 64;
12326 if (GET_CODE (operands[2]) == CONST_INT)
12328 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12329 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12331 if (count == single_width * 2 - 1)
12333 emit_move_insn (high[0], high[1]);
12334 emit_insn ((mode == DImode
12336 : gen_ashrdi3) (high[0], high[0],
12337 GEN_INT (single_width - 1)));
12338 emit_move_insn (low[0], high[0]);
12341 else if (count >= single_width)
12343 emit_move_insn (low[0], high[1]);
12344 emit_move_insn (high[0], low[0]);
12345 emit_insn ((mode == DImode
12347 : gen_ashrdi3) (high[0], high[0],
12348 GEN_INT (single_width - 1)));
12349 if (count > single_width)
12350 emit_insn ((mode == DImode
12352 : gen_ashrdi3) (low[0], low[0],
12353 GEN_INT (count - single_width)));
12357 if (!rtx_equal_p (operands[0], operands[1]))
12358 emit_move_insn (operands[0], operands[1]);
12359 emit_insn ((mode == DImode
12361 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12362 emit_insn ((mode == DImode
12364 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12369 if (!rtx_equal_p (operands[0], operands[1]))
12370 emit_move_insn (operands[0], operands[1]);
12372 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12374 emit_insn ((mode == DImode
12376 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12377 emit_insn ((mode == DImode
12379 : gen_ashrdi3) (high[0], high[0], operands[2]));
12381 if (TARGET_CMOVE && scratch)
12383 emit_move_insn (scratch, high[0]);
12384 emit_insn ((mode == DImode
12386 : gen_ashrdi3) (scratch, scratch,
12387 GEN_INT (single_width - 1)));
12388 emit_insn ((mode == DImode
12389 ? gen_x86_shift_adj_1
12390 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12394 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12399 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12401 rtx low[2], high[2];
12403 const int single_width = mode == DImode ? 32 : 64;
12405 if (GET_CODE (operands[2]) == CONST_INT)
12407 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12408 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12410 if (count >= single_width)
12412 emit_move_insn (low[0], high[1]);
12413 ix86_expand_clear (high[0]);
12415 if (count > single_width)
12416 emit_insn ((mode == DImode
12418 : gen_lshrdi3) (low[0], low[0],
12419 GEN_INT (count - single_width)));
12423 if (!rtx_equal_p (operands[0], operands[1]))
12424 emit_move_insn (operands[0], operands[1]);
12425 emit_insn ((mode == DImode
12427 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12428 emit_insn ((mode == DImode
12430 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12435 if (!rtx_equal_p (operands[0], operands[1]))
12436 emit_move_insn (operands[0], operands[1]);
12438 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12440 emit_insn ((mode == DImode
12442 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12443 emit_insn ((mode == DImode
12445 : gen_lshrdi3) (high[0], high[0], operands[2]));
12447 /* Heh. By reversing the arguments, we can reuse this pattern. */
12448 if (TARGET_CMOVE && scratch)
12450 ix86_expand_clear (scratch);
12451 emit_insn ((mode == DImode
12452 ? gen_x86_shift_adj_1
12453 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12457 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12461 /* Helper function for the string operations below. Dest VARIABLE whether
12462 it is aligned to VALUE bytes. If true, jump to the label. */
12464 ix86_expand_aligntest (rtx variable, int value)
12466 rtx label = gen_label_rtx ();
12467 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12468 if (GET_MODE (variable) == DImode)
12469 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12471 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12472 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12477 /* Adjust COUNTER by the VALUE. */
12479 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12481 if (GET_MODE (countreg) == DImode)
12482 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12484 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12487 /* Zero extend possibly SImode EXP to Pmode register. */
12489 ix86_zero_extend_to_Pmode (rtx exp)
12492 if (GET_MODE (exp) == VOIDmode)
12493 return force_reg (Pmode, exp);
12494 if (GET_MODE (exp) == Pmode)
12495 return copy_to_mode_reg (Pmode, exp);
12496 r = gen_reg_rtx (Pmode);
12497 emit_insn (gen_zero_extendsidi2 (r, exp));
12501 /* Expand string move (memcpy) operation. Use i386 string operations when
12502 profitable. expand_clrmem contains similar code. */
12504 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12506 rtx srcreg, destreg, countreg, srcexp, destexp;
12507 enum machine_mode counter_mode;
12508 HOST_WIDE_INT align = 0;
12509 unsigned HOST_WIDE_INT count = 0;
12511 if (GET_CODE (align_exp) == CONST_INT)
12512 align = INTVAL (align_exp);
12514 /* Can't use any of this if the user has appropriated esi or edi. */
12515 if (global_regs[4] || global_regs[5])
12518 /* This simple hack avoids all inlining code and simplifies code below. */
12519 if (!TARGET_ALIGN_STRINGOPS)
12522 if (GET_CODE (count_exp) == CONST_INT)
12524 count = INTVAL (count_exp);
12525 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12529 /* Figure out proper mode for counter. For 32bits it is always SImode,
12530 for 64bits use SImode when possible, otherwise DImode.
12531 Set count to number of bytes copied when known at compile time. */
12533 || GET_MODE (count_exp) == SImode
12534 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12535 counter_mode = SImode;
12537 counter_mode = DImode;
12539 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12541 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12542 if (destreg != XEXP (dst, 0))
12543 dst = replace_equiv_address_nv (dst, destreg);
12544 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12545 if (srcreg != XEXP (src, 0))
12546 src = replace_equiv_address_nv (src, srcreg);
12548 /* When optimizing for size emit simple rep ; movsb instruction for
12549 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12550 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12551 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12552 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12553 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12554 known to be zero or not. The rep; movsb sequence causes higher
12555 register pressure though, so take that into account. */
12557 if ((!optimize || optimize_size)
12562 || (count & 3) + count / 4 > 6))))
12564 emit_insn (gen_cld ());
12565 countreg = ix86_zero_extend_to_Pmode (count_exp);
12566 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12567 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12568 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12572 /* For constant aligned (or small unaligned) copies use rep movsl
12573 followed by code copying the rest. For PentiumPro ensure 8 byte
12574 alignment to allow rep movsl acceleration. */
12576 else if (count != 0
12578 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12579 || optimize_size || count < (unsigned int) 64))
12581 unsigned HOST_WIDE_INT offset = 0;
12582 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12583 rtx srcmem, dstmem;
12585 emit_insn (gen_cld ());
12586 if (count & ~(size - 1))
12588 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12590 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12592 while (offset < (count & ~(size - 1)))
12594 srcmem = adjust_automodify_address_nv (src, movs_mode,
12596 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12598 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12604 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12605 & (TARGET_64BIT ? -1 : 0x3fffffff));
12606 countreg = copy_to_mode_reg (counter_mode, countreg);
12607 countreg = ix86_zero_extend_to_Pmode (countreg);
12609 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12610 GEN_INT (size == 4 ? 2 : 3));
12611 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12612 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12614 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12615 countreg, destexp, srcexp));
12616 offset = count & ~(size - 1);
12619 if (size == 8 && (count & 0x04))
12621 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12623 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12625 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12630 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12632 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12634 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12639 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12641 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12643 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12646 /* The generic code based on the glibc implementation:
12647 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12648 allowing accelerated copying there)
12649 - copy the data using rep movsl
12650 - copy the rest. */
12655 rtx srcmem, dstmem;
12656 int desired_alignment = (TARGET_PENTIUMPRO
12657 && (count == 0 || count >= (unsigned int) 260)
12658 ? 8 : UNITS_PER_WORD);
12659 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12660 dst = change_address (dst, BLKmode, destreg);
12661 src = change_address (src, BLKmode, srcreg);
12663 /* In case we don't know anything about the alignment, default to
12664 library version, since it is usually equally fast and result in
12667 Also emit call when we know that the count is large and call overhead
12668 will not be important. */
12669 if (!TARGET_INLINE_ALL_STRINGOPS
12670 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12673 if (TARGET_SINGLE_STRINGOP)
12674 emit_insn (gen_cld ());
12676 countreg2 = gen_reg_rtx (Pmode);
12677 countreg = copy_to_mode_reg (counter_mode, count_exp);
12679 /* We don't use loops to align destination and to copy parts smaller
12680 than 4 bytes, because gcc is able to optimize such code better (in
12681 the case the destination or the count really is aligned, gcc is often
12682 able to predict the branches) and also it is friendlier to the
12683 hardware branch prediction.
12685 Using loops is beneficial for generic case, because we can
12686 handle small counts using the loops. Many CPUs (such as Athlon)
12687 have large REP prefix setup costs.
12689 This is quite costly. Maybe we can revisit this decision later or
12690 add some customizability to this code. */
12692 if (count == 0 && align < desired_alignment)
12694 label = gen_label_rtx ();
12695 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12696 LEU, 0, counter_mode, 1, label);
12700 rtx label = ix86_expand_aligntest (destreg, 1);
12701 srcmem = change_address (src, QImode, srcreg);
12702 dstmem = change_address (dst, QImode, destreg);
12703 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12704 ix86_adjust_counter (countreg, 1);
12705 emit_label (label);
12706 LABEL_NUSES (label) = 1;
12710 rtx label = ix86_expand_aligntest (destreg, 2);
12711 srcmem = change_address (src, HImode, srcreg);
12712 dstmem = change_address (dst, HImode, destreg);
12713 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12714 ix86_adjust_counter (countreg, 2);
12715 emit_label (label);
12716 LABEL_NUSES (label) = 1;
12718 if (align <= 4 && desired_alignment > 4)
12720 rtx label = ix86_expand_aligntest (destreg, 4);
12721 srcmem = change_address (src, SImode, srcreg);
12722 dstmem = change_address (dst, SImode, destreg);
12723 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12724 ix86_adjust_counter (countreg, 4);
12725 emit_label (label);
12726 LABEL_NUSES (label) = 1;
12729 if (label && desired_alignment > 4 && !TARGET_64BIT)
12731 emit_label (label);
12732 LABEL_NUSES (label) = 1;
12735 if (!TARGET_SINGLE_STRINGOP)
12736 emit_insn (gen_cld ());
12739 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12741 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12745 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12746 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12748 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12749 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12750 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12751 countreg2, destexp, srcexp));
12755 emit_label (label);
12756 LABEL_NUSES (label) = 1;
12758 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12760 srcmem = change_address (src, SImode, srcreg);
12761 dstmem = change_address (dst, SImode, destreg);
12762 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12764 if ((align <= 4 || count == 0) && TARGET_64BIT)
12766 rtx label = ix86_expand_aligntest (countreg, 4);
12767 srcmem = change_address (src, SImode, srcreg);
12768 dstmem = change_address (dst, SImode, destreg);
12769 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12770 emit_label (label);
12771 LABEL_NUSES (label) = 1;
12773 if (align > 2 && count != 0 && (count & 2))
12775 srcmem = change_address (src, HImode, srcreg);
12776 dstmem = change_address (dst, HImode, destreg);
12777 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12779 if (align <= 2 || count == 0)
12781 rtx label = ix86_expand_aligntest (countreg, 2);
12782 srcmem = change_address (src, HImode, srcreg);
12783 dstmem = change_address (dst, HImode, destreg);
12784 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12785 emit_label (label);
12786 LABEL_NUSES (label) = 1;
12788 if (align > 1 && count != 0 && (count & 1))
12790 srcmem = change_address (src, QImode, srcreg);
12791 dstmem = change_address (dst, QImode, destreg);
12792 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12794 if (align <= 1 || count == 0)
12796 rtx label = ix86_expand_aligntest (countreg, 1);
12797 srcmem = change_address (src, QImode, srcreg);
12798 dstmem = change_address (dst, QImode, destreg);
12799 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12800 emit_label (label);
12801 LABEL_NUSES (label) = 1;
12808 /* Expand string clear operation (bzero). Use i386 string operations when
12809 profitable. expand_movmem contains similar code. */
12811 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12813 rtx destreg, zeroreg, countreg, destexp;
12814 enum machine_mode counter_mode;
12815 HOST_WIDE_INT align = 0;
12816 unsigned HOST_WIDE_INT count = 0;
12818 if (GET_CODE (align_exp) == CONST_INT)
12819 align = INTVAL (align_exp);
12821 /* Can't use any of this if the user has appropriated esi. */
12822 if (global_regs[4])
12825 /* This simple hack avoids all inlining code and simplifies code below. */
12826 if (!TARGET_ALIGN_STRINGOPS)
12829 if (GET_CODE (count_exp) == CONST_INT)
12831 count = INTVAL (count_exp);
12832 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12835 /* Figure out proper mode for counter. For 32bits it is always SImode,
12836 for 64bits use SImode when possible, otherwise DImode.
12837 Set count to number of bytes copied when known at compile time. */
12839 || GET_MODE (count_exp) == SImode
12840 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12841 counter_mode = SImode;
12843 counter_mode = DImode;
12845 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12846 if (destreg != XEXP (dst, 0))
12847 dst = replace_equiv_address_nv (dst, destreg);
12850 /* When optimizing for size emit simple rep ; movsb instruction for
12851 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12852 sequence is 7 bytes long, so if optimizing for size and count is
12853 small enough that some stosl, stosw and stosb instructions without
12854 rep are shorter, fall back into the next if. */
12856 if ((!optimize || optimize_size)
12859 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12861 emit_insn (gen_cld ());
12863 countreg = ix86_zero_extend_to_Pmode (count_exp);
12864 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12865 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12866 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12868 else if (count != 0
12870 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12871 || optimize_size || count < (unsigned int) 64))
12873 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12874 unsigned HOST_WIDE_INT offset = 0;
12876 emit_insn (gen_cld ());
12878 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12879 if (count & ~(size - 1))
12881 unsigned HOST_WIDE_INT repcount;
12882 unsigned int max_nonrep;
12884 repcount = count >> (size == 4 ? 2 : 3);
12886 repcount &= 0x3fffffff;
12888 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12889 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12890 bytes. In both cases the latter seems to be faster for small
12892 max_nonrep = size == 4 ? 7 : 4;
12893 if (!optimize_size)
12896 case PROCESSOR_PENTIUM4:
12897 case PROCESSOR_NOCONA:
12904 if (repcount <= max_nonrep)
12905 while (repcount-- > 0)
12907 rtx mem = adjust_automodify_address_nv (dst,
12908 GET_MODE (zeroreg),
12910 emit_insn (gen_strset (destreg, mem, zeroreg));
12915 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12916 countreg = ix86_zero_extend_to_Pmode (countreg);
12917 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12918 GEN_INT (size == 4 ? 2 : 3));
12919 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12920 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12922 offset = count & ~(size - 1);
12925 if (size == 8 && (count & 0x04))
12927 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12929 emit_insn (gen_strset (destreg, mem,
12930 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12935 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12937 emit_insn (gen_strset (destreg, mem,
12938 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12943 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12945 emit_insn (gen_strset (destreg, mem,
12946 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12953 /* Compute desired alignment of the string operation. */
12954 int desired_alignment = (TARGET_PENTIUMPRO
12955 && (count == 0 || count >= (unsigned int) 260)
12956 ? 8 : UNITS_PER_WORD);
12958 /* In case we don't know anything about the alignment, default to
12959 library version, since it is usually equally fast and result in
12962 Also emit call when we know that the count is large and call overhead
12963 will not be important. */
12964 if (!TARGET_INLINE_ALL_STRINGOPS
12965 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12968 if (TARGET_SINGLE_STRINGOP)
12969 emit_insn (gen_cld ());
12971 countreg2 = gen_reg_rtx (Pmode);
12972 countreg = copy_to_mode_reg (counter_mode, count_exp);
12973 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12974 /* Get rid of MEM_OFFSET, it won't be accurate. */
12975 dst = change_address (dst, BLKmode, destreg);
12977 if (count == 0 && align < desired_alignment)
12979 label = gen_label_rtx ();
12980 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12981 LEU, 0, counter_mode, 1, label);
12985 rtx label = ix86_expand_aligntest (destreg, 1);
12986 emit_insn (gen_strset (destreg, dst,
12987 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12988 ix86_adjust_counter (countreg, 1);
12989 emit_label (label);
12990 LABEL_NUSES (label) = 1;
12994 rtx label = ix86_expand_aligntest (destreg, 2);
12995 emit_insn (gen_strset (destreg, dst,
12996 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12997 ix86_adjust_counter (countreg, 2);
12998 emit_label (label);
12999 LABEL_NUSES (label) = 1;
13001 if (align <= 4 && desired_alignment > 4)
13003 rtx label = ix86_expand_aligntest (destreg, 4);
13004 emit_insn (gen_strset (destreg, dst,
13006 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13008 ix86_adjust_counter (countreg, 4);
13009 emit_label (label);
13010 LABEL_NUSES (label) = 1;
13013 if (label && desired_alignment > 4 && !TARGET_64BIT)
13015 emit_label (label);
13016 LABEL_NUSES (label) = 1;
13020 if (!TARGET_SINGLE_STRINGOP)
13021 emit_insn (gen_cld ());
13024 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13026 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13030 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13031 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13033 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13034 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13038 emit_label (label);
13039 LABEL_NUSES (label) = 1;
13042 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13043 emit_insn (gen_strset (destreg, dst,
13044 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13045 if (TARGET_64BIT && (align <= 4 || count == 0))
13047 rtx label = ix86_expand_aligntest (countreg, 4);
13048 emit_insn (gen_strset (destreg, dst,
13049 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13050 emit_label (label);
13051 LABEL_NUSES (label) = 1;
13053 if (align > 2 && count != 0 && (count & 2))
13054 emit_insn (gen_strset (destreg, dst,
13055 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13056 if (align <= 2 || count == 0)
13058 rtx label = ix86_expand_aligntest (countreg, 2);
13059 emit_insn (gen_strset (destreg, dst,
13060 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13061 emit_label (label);
13062 LABEL_NUSES (label) = 1;
13064 if (align > 1 && count != 0 && (count & 1))
13065 emit_insn (gen_strset (destreg, dst,
13066 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13067 if (align <= 1 || count == 0)
13069 rtx label = ix86_expand_aligntest (countreg, 1);
13070 emit_insn (gen_strset (destreg, dst,
13071 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13072 emit_label (label);
13073 LABEL_NUSES (label) = 1;
13079 /* Expand strlen. */
13081 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13083 rtx addr, scratch1, scratch2, scratch3, scratch4;
13085 /* The generic case of strlen expander is long. Avoid it's
13086 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13088 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13089 && !TARGET_INLINE_ALL_STRINGOPS
13091 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13094 addr = force_reg (Pmode, XEXP (src, 0));
13095 scratch1 = gen_reg_rtx (Pmode);
13097 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13100 /* Well it seems that some optimizer does not combine a call like
13101 foo(strlen(bar), strlen(bar));
13102 when the move and the subtraction is done here. It does calculate
13103 the length just once when these instructions are done inside of
13104 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13105 often used and I use one fewer register for the lifetime of
13106 output_strlen_unroll() this is better. */
13108 emit_move_insn (out, addr);
13110 ix86_expand_strlensi_unroll_1 (out, src, align);
13112 /* strlensi_unroll_1 returns the address of the zero at the end of
13113 the string, like memchr(), so compute the length by subtracting
13114 the start address. */
13116 emit_insn (gen_subdi3 (out, out, addr));
13118 emit_insn (gen_subsi3 (out, out, addr));
13123 scratch2 = gen_reg_rtx (Pmode);
13124 scratch3 = gen_reg_rtx (Pmode);
13125 scratch4 = force_reg (Pmode, constm1_rtx);
13127 emit_move_insn (scratch3, addr);
13128 eoschar = force_reg (QImode, eoschar);
13130 emit_insn (gen_cld ());
13131 src = replace_equiv_address_nv (src, scratch3);
13133 /* If .md starts supporting :P, this can be done in .md. */
13134 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13135 scratch4), UNSPEC_SCAS);
13136 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13139 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13140 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13144 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13145 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13151 /* Expand the appropriate insns for doing strlen if not just doing
13154 out = result, initialized with the start address
13155 align_rtx = alignment of the address.
13156 scratch = scratch register, initialized with the startaddress when
13157 not aligned, otherwise undefined
13159 This is just the body. It needs the initializations mentioned above and
13160 some address computing at the end. These things are done in i386.md. */
13163 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13167 rtx align_2_label = NULL_RTX;
13168 rtx align_3_label = NULL_RTX;
13169 rtx align_4_label = gen_label_rtx ();
13170 rtx end_0_label = gen_label_rtx ();
13172 rtx tmpreg = gen_reg_rtx (SImode);
13173 rtx scratch = gen_reg_rtx (SImode);
13177 if (GET_CODE (align_rtx) == CONST_INT)
13178 align = INTVAL (align_rtx);
13180 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13182 /* Is there a known alignment and is it less than 4? */
13185 rtx scratch1 = gen_reg_rtx (Pmode);
13186 emit_move_insn (scratch1, out);
13187 /* Is there a known alignment and is it not 2? */
13190 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13191 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13193 /* Leave just the 3 lower bits. */
13194 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13195 NULL_RTX, 0, OPTAB_WIDEN);
13197 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13198 Pmode, 1, align_4_label);
13199 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13200 Pmode, 1, align_2_label);
13201 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13202 Pmode, 1, align_3_label);
13206 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13207 check if is aligned to 4 - byte. */
13209 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13210 NULL_RTX, 0, OPTAB_WIDEN);
13212 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13213 Pmode, 1, align_4_label);
13216 mem = change_address (src, QImode, out);
13218 /* Now compare the bytes. */
13220 /* Compare the first n unaligned byte on a byte per byte basis. */
13221 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13222 QImode, 1, end_0_label);
13224 /* Increment the address. */
13226 emit_insn (gen_adddi3 (out, out, const1_rtx));
13228 emit_insn (gen_addsi3 (out, out, const1_rtx));
13230 /* Not needed with an alignment of 2 */
13233 emit_label (align_2_label);
13235 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13239 emit_insn (gen_adddi3 (out, out, const1_rtx));
13241 emit_insn (gen_addsi3 (out, out, const1_rtx));
13243 emit_label (align_3_label);
13246 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13250 emit_insn (gen_adddi3 (out, out, const1_rtx));
13252 emit_insn (gen_addsi3 (out, out, const1_rtx));
13255 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13256 align this loop. It gives only huge programs, but does not help to
13258 emit_label (align_4_label);
13260 mem = change_address (src, SImode, out);
13261 emit_move_insn (scratch, mem);
13263 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13265 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13267 /* This formula yields a nonzero result iff one of the bytes is zero.
13268 This saves three branches inside loop and many cycles. */
13270 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13271 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13272 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13273 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13274 gen_int_mode (0x80808080, SImode)));
13275 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13280 rtx reg = gen_reg_rtx (SImode);
13281 rtx reg2 = gen_reg_rtx (Pmode);
13282 emit_move_insn (reg, tmpreg);
13283 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13285 /* If zero is not in the first two bytes, move two bytes forward. */
13286 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13287 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13288 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13289 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13290 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13293 /* Emit lea manually to avoid clobbering of flags. */
13294 emit_insn (gen_rtx_SET (SImode, reg2,
13295 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13297 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13298 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13299 emit_insn (gen_rtx_SET (VOIDmode, out,
13300 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13307 rtx end_2_label = gen_label_rtx ();
13308 /* Is zero in the first two bytes? */
13310 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13311 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13312 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13313 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13314 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13316 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13317 JUMP_LABEL (tmp) = end_2_label;
13319 /* Not in the first two. Move two bytes forward. */
13320 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13322 emit_insn (gen_adddi3 (out, out, const2_rtx));
13324 emit_insn (gen_addsi3 (out, out, const2_rtx));
13326 emit_label (end_2_label);
13330 /* Avoid branch in fixing the byte. */
13331 tmpreg = gen_lowpart (QImode, tmpreg);
13332 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13333 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13335 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13337 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13339 emit_label (end_0_label);
13343 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13344 rtx callarg2 ATTRIBUTE_UNUSED,
13345 rtx pop, int sibcall)
13347 rtx use = NULL, call;
13349 if (pop == const0_rtx)
13351 gcc_assert (!TARGET_64BIT || !pop);
13353 if (TARGET_MACHO && !TARGET_64BIT)
13356 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13357 fnaddr = machopic_indirect_call_target (fnaddr);
13362 /* Static functions and indirect calls don't need the pic register. */
13363 if (! TARGET_64BIT && flag_pic
13364 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13365 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13366 use_reg (&use, pic_offset_table_rtx);
13369 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13371 rtx al = gen_rtx_REG (QImode, 0);
13372 emit_move_insn (al, callarg2);
13373 use_reg (&use, al);
13376 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13378 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13379 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13381 if (sibcall && TARGET_64BIT
13382 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13385 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13386 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13387 emit_move_insn (fnaddr, addr);
13388 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13391 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13393 call = gen_rtx_SET (VOIDmode, retval, call);
13396 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13397 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13398 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13401 call = emit_call_insn (call);
13403 CALL_INSN_FUNCTION_USAGE (call) = use;
13407 /* Clear stack slot assignments remembered from previous functions.
13408 This is called from INIT_EXPANDERS once before RTL is emitted for each
13411 static struct machine_function *
13412 ix86_init_machine_status (void)
13414 struct machine_function *f;
13416 f = ggc_alloc_cleared (sizeof (struct machine_function));
13417 f->use_fast_prologue_epilogue_nregs = -1;
13418 f->tls_descriptor_call_expanded_p = 0;
13423 /* Return a MEM corresponding to a stack slot with mode MODE.
13424 Allocate a new slot if necessary.
13426 The RTL for a function can have several slots available: N is
13427 which slot to use. */
13430 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13432 struct stack_local_entry *s;
13434 gcc_assert (n < MAX_386_STACK_LOCALS);
13436 for (s = ix86_stack_locals; s; s = s->next)
13437 if (s->mode == mode && s->n == n)
13440 s = (struct stack_local_entry *)
13441 ggc_alloc (sizeof (struct stack_local_entry));
13444 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13446 s->next = ix86_stack_locals;
13447 ix86_stack_locals = s;
13451 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13453 static GTY(()) rtx ix86_tls_symbol;
13455 ix86_tls_get_addr (void)
13458 if (!ix86_tls_symbol)
13460 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13461 (TARGET_ANY_GNU_TLS
13463 ? "___tls_get_addr"
13464 : "__tls_get_addr");
13467 return ix86_tls_symbol;
13470 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13472 static GTY(()) rtx ix86_tls_module_base_symbol;
13474 ix86_tls_module_base (void)
13477 if (!ix86_tls_module_base_symbol)
13479 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13480 "_TLS_MODULE_BASE_");
13481 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13482 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13485 return ix86_tls_module_base_symbol;
13488 /* Calculate the length of the memory address in the instruction
13489 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13492 memory_address_length (rtx addr)
13494 struct ix86_address parts;
13495 rtx base, index, disp;
13499 if (GET_CODE (addr) == PRE_DEC
13500 || GET_CODE (addr) == POST_INC
13501 || GET_CODE (addr) == PRE_MODIFY
13502 || GET_CODE (addr) == POST_MODIFY)
13505 ok = ix86_decompose_address (addr, &parts);
13508 if (parts.base && GET_CODE (parts.base) == SUBREG)
13509 parts.base = SUBREG_REG (parts.base);
13510 if (parts.index && GET_CODE (parts.index) == SUBREG)
13511 parts.index = SUBREG_REG (parts.index);
13514 index = parts.index;
13519 - esp as the base always wants an index,
13520 - ebp as the base always wants a displacement. */
13522 /* Register Indirect. */
13523 if (base && !index && !disp)
13525 /* esp (for its index) and ebp (for its displacement) need
13526 the two-byte modrm form. */
13527 if (addr == stack_pointer_rtx
13528 || addr == arg_pointer_rtx
13529 || addr == frame_pointer_rtx
13530 || addr == hard_frame_pointer_rtx)
13534 /* Direct Addressing. */
13535 else if (disp && !base && !index)
13540 /* Find the length of the displacement constant. */
13543 if (base && satisfies_constraint_K (disp))
13548 /* ebp always wants a displacement. */
13549 else if (base == hard_frame_pointer_rtx)
13552 /* An index requires the two-byte modrm form.... */
13554 /* ...like esp, which always wants an index. */
13555 || base == stack_pointer_rtx
13556 || base == arg_pointer_rtx
13557 || base == frame_pointer_rtx)
13564 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13565 is set, expect that insn have 8bit immediate alternative. */
13567 ix86_attr_length_immediate_default (rtx insn, int shortform)
13571 extract_insn_cached (insn);
13572 for (i = recog_data.n_operands - 1; i >= 0; --i)
13573 if (CONSTANT_P (recog_data.operand[i]))
13576 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13580 switch (get_attr_mode (insn))
13591 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13596 fatal_insn ("unknown insn mode", insn);
13602 /* Compute default value for "length_address" attribute. */
13604 ix86_attr_length_address_default (rtx insn)
13608 if (get_attr_type (insn) == TYPE_LEA)
13610 rtx set = PATTERN (insn);
13612 if (GET_CODE (set) == PARALLEL)
13613 set = XVECEXP (set, 0, 0);
13615 gcc_assert (GET_CODE (set) == SET);
13617 return memory_address_length (SET_SRC (set));
13620 extract_insn_cached (insn);
13621 for (i = recog_data.n_operands - 1; i >= 0; --i)
13622 if (GET_CODE (recog_data.operand[i]) == MEM)
13624 return memory_address_length (XEXP (recog_data.operand[i], 0));
13630 /* Return the maximum number of instructions a cpu can issue. */
13633 ix86_issue_rate (void)
13637 case PROCESSOR_PENTIUM:
13641 case PROCESSOR_PENTIUMPRO:
13642 case PROCESSOR_PENTIUM4:
13643 case PROCESSOR_ATHLON:
13645 case PROCESSOR_NOCONA:
13646 case PROCESSOR_GENERIC32:
13647 case PROCESSOR_GENERIC64:
13655 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13656 by DEP_INSN and nothing set by DEP_INSN. */
13659 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13663 /* Simplify the test for uninteresting insns. */
13664 if (insn_type != TYPE_SETCC
13665 && insn_type != TYPE_ICMOV
13666 && insn_type != TYPE_FCMOV
13667 && insn_type != TYPE_IBR)
13670 if ((set = single_set (dep_insn)) != 0)
13672 set = SET_DEST (set);
13675 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13676 && XVECLEN (PATTERN (dep_insn), 0) == 2
13677 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13678 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13680 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13681 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13686 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13689 /* This test is true if the dependent insn reads the flags but
13690 not any other potentially set register. */
13691 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13694 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13700 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13701 address with operands set by DEP_INSN. */
13704 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13708 if (insn_type == TYPE_LEA
13711 addr = PATTERN (insn);
13713 if (GET_CODE (addr) == PARALLEL)
13714 addr = XVECEXP (addr, 0, 0);
13716 gcc_assert (GET_CODE (addr) == SET);
13718 addr = SET_SRC (addr);
13723 extract_insn_cached (insn);
13724 for (i = recog_data.n_operands - 1; i >= 0; --i)
13725 if (GET_CODE (recog_data.operand[i]) == MEM)
13727 addr = XEXP (recog_data.operand[i], 0);
13734 return modified_in_p (addr, dep_insn);
13738 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13740 enum attr_type insn_type, dep_insn_type;
13741 enum attr_memory memory;
13743 int dep_insn_code_number;
13745 /* Anti and output dependencies have zero cost on all CPUs. */
13746 if (REG_NOTE_KIND (link) != 0)
13749 dep_insn_code_number = recog_memoized (dep_insn);
13751 /* If we can't recognize the insns, we can't really do anything. */
13752 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13755 insn_type = get_attr_type (insn);
13756 dep_insn_type = get_attr_type (dep_insn);
13760 case PROCESSOR_PENTIUM:
13761 /* Address Generation Interlock adds a cycle of latency. */
13762 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13765 /* ??? Compares pair with jump/setcc. */
13766 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13769 /* Floating point stores require value to be ready one cycle earlier. */
13770 if (insn_type == TYPE_FMOV
13771 && get_attr_memory (insn) == MEMORY_STORE
13772 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13776 case PROCESSOR_PENTIUMPRO:
13777 memory = get_attr_memory (insn);
13779 /* INT->FP conversion is expensive. */
13780 if (get_attr_fp_int_src (dep_insn))
13783 /* There is one cycle extra latency between an FP op and a store. */
13784 if (insn_type == TYPE_FMOV
13785 && (set = single_set (dep_insn)) != NULL_RTX
13786 && (set2 = single_set (insn)) != NULL_RTX
13787 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13788 && GET_CODE (SET_DEST (set2)) == MEM)
13791 /* Show ability of reorder buffer to hide latency of load by executing
13792 in parallel with previous instruction in case
13793 previous instruction is not needed to compute the address. */
13794 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13795 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13797 /* Claim moves to take one cycle, as core can issue one load
13798 at time and the next load can start cycle later. */
13799 if (dep_insn_type == TYPE_IMOV
13800 || dep_insn_type == TYPE_FMOV)
13808 memory = get_attr_memory (insn);
13810 /* The esp dependency is resolved before the instruction is really
13812 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13813 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13816 /* INT->FP conversion is expensive. */
13817 if (get_attr_fp_int_src (dep_insn))
13820 /* Show ability of reorder buffer to hide latency of load by executing
13821 in parallel with previous instruction in case
13822 previous instruction is not needed to compute the address. */
13823 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13824 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13826 /* Claim moves to take one cycle, as core can issue one load
13827 at time and the next load can start cycle later. */
13828 if (dep_insn_type == TYPE_IMOV
13829 || dep_insn_type == TYPE_FMOV)
13838 case PROCESSOR_ATHLON:
13840 case PROCESSOR_GENERIC32:
13841 case PROCESSOR_GENERIC64:
13842 memory = get_attr_memory (insn);
13844 /* Show ability of reorder buffer to hide latency of load by executing
13845 in parallel with previous instruction in case
13846 previous instruction is not needed to compute the address. */
13847 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13848 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13850 enum attr_unit unit = get_attr_unit (insn);
13853 /* Because of the difference between the length of integer and
13854 floating unit pipeline preparation stages, the memory operands
13855 for floating point are cheaper.
13857 ??? For Athlon it the difference is most probably 2. */
13858 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13861 loadcost = TARGET_ATHLON ? 2 : 0;
13863 if (cost >= loadcost)
13876 /* How many alternative schedules to try. This should be as wide as the
13877 scheduling freedom in the DFA, but no wider. Making this value too
13878 large results extra work for the scheduler. */
13881 ia32_multipass_dfa_lookahead (void)
13883 if (ix86_tune == PROCESSOR_PENTIUM)
13886 if (ix86_tune == PROCESSOR_PENTIUMPRO
13887 || ix86_tune == PROCESSOR_K6)
13895 /* Compute the alignment given to a constant that is being placed in memory.
13896 EXP is the constant and ALIGN is the alignment that the object would
13898 The value of this function is used instead of that alignment to align
13902 ix86_constant_alignment (tree exp, int align)
13904 if (TREE_CODE (exp) == REAL_CST)
13906 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13908 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13911 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13912 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13913 return BITS_PER_WORD;
13918 /* Compute the alignment for a static variable.
13919 TYPE is the data type, and ALIGN is the alignment that
13920 the object would ordinarily have. The value of this function is used
13921 instead of that alignment to align the object. */
13924 ix86_data_alignment (tree type, int align)
13926 int max_align = optimize_size ? BITS_PER_WORD : 256;
13928 if (AGGREGATE_TYPE_P (type)
13929 && TYPE_SIZE (type)
13930 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13931 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13932 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13933 && align < max_align)
13936 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13937 to 16byte boundary. */
13940 if (AGGREGATE_TYPE_P (type)
13941 && TYPE_SIZE (type)
13942 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13943 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13944 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13948 if (TREE_CODE (type) == ARRAY_TYPE)
13950 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13952 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13955 else if (TREE_CODE (type) == COMPLEX_TYPE)
13958 if (TYPE_MODE (type) == DCmode && align < 64)
13960 if (TYPE_MODE (type) == XCmode && align < 128)
13963 else if ((TREE_CODE (type) == RECORD_TYPE
13964 || TREE_CODE (type) == UNION_TYPE
13965 || TREE_CODE (type) == QUAL_UNION_TYPE)
13966 && TYPE_FIELDS (type))
13968 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13970 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13973 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13974 || TREE_CODE (type) == INTEGER_TYPE)
13976 if (TYPE_MODE (type) == DFmode && align < 64)
13978 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13985 /* Compute the alignment for a local variable.
13986 TYPE is the data type, and ALIGN is the alignment that
13987 the object would ordinarily have. The value of this macro is used
13988 instead of that alignment to align the object. */
13991 ix86_local_alignment (tree type, int align)
13993 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13994 to 16byte boundary. */
13997 if (AGGREGATE_TYPE_P (type)
13998 && TYPE_SIZE (type)
13999 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14000 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14001 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14004 if (TREE_CODE (type) == ARRAY_TYPE)
14006 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14008 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14011 else if (TREE_CODE (type) == COMPLEX_TYPE)
14013 if (TYPE_MODE (type) == DCmode && align < 64)
14015 if (TYPE_MODE (type) == XCmode && align < 128)
14018 else if ((TREE_CODE (type) == RECORD_TYPE
14019 || TREE_CODE (type) == UNION_TYPE
14020 || TREE_CODE (type) == QUAL_UNION_TYPE)
14021 && TYPE_FIELDS (type))
14023 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14025 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14028 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14029 || TREE_CODE (type) == INTEGER_TYPE)
14032 if (TYPE_MODE (type) == DFmode && align < 64)
14034 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14040 /* Emit RTL insns to initialize the variable parts of a trampoline.
14041 FNADDR is an RTX for the address of the function's pure code.
14042 CXT is an RTX for the static chain value for the function. */
14044 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14048 /* Compute offset from the end of the jmp to the target function. */
14049 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14050 plus_constant (tramp, 10),
14051 NULL_RTX, 1, OPTAB_DIRECT);
14052 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14053 gen_int_mode (0xb9, QImode));
14054 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14055 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14056 gen_int_mode (0xe9, QImode));
14057 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14062 /* Try to load address using shorter movl instead of movabs.
14063 We may want to support movq for kernel mode, but kernel does not use
14064 trampolines at the moment. */
14065 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14067 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14068 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14069 gen_int_mode (0xbb41, HImode));
14070 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14071 gen_lowpart (SImode, fnaddr));
14076 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14077 gen_int_mode (0xbb49, HImode));
14078 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14082 /* Load static chain using movabs to r10. */
14083 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14084 gen_int_mode (0xba49, HImode));
14085 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14088 /* Jump to the r11 */
14089 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14090 gen_int_mode (0xff49, HImode));
14091 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14092 gen_int_mode (0xe3, QImode));
14094 gcc_assert (offset <= TRAMPOLINE_SIZE);
14097 #ifdef ENABLE_EXECUTE_STACK
14098 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14099 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14103 /* Codes for all the SSE/MMX builtins. */
14106 IX86_BUILTIN_ADDPS,
14107 IX86_BUILTIN_ADDSS,
14108 IX86_BUILTIN_DIVPS,
14109 IX86_BUILTIN_DIVSS,
14110 IX86_BUILTIN_MULPS,
14111 IX86_BUILTIN_MULSS,
14112 IX86_BUILTIN_SUBPS,
14113 IX86_BUILTIN_SUBSS,
14115 IX86_BUILTIN_CMPEQPS,
14116 IX86_BUILTIN_CMPLTPS,
14117 IX86_BUILTIN_CMPLEPS,
14118 IX86_BUILTIN_CMPGTPS,
14119 IX86_BUILTIN_CMPGEPS,
14120 IX86_BUILTIN_CMPNEQPS,
14121 IX86_BUILTIN_CMPNLTPS,
14122 IX86_BUILTIN_CMPNLEPS,
14123 IX86_BUILTIN_CMPNGTPS,
14124 IX86_BUILTIN_CMPNGEPS,
14125 IX86_BUILTIN_CMPORDPS,
14126 IX86_BUILTIN_CMPUNORDPS,
14127 IX86_BUILTIN_CMPEQSS,
14128 IX86_BUILTIN_CMPLTSS,
14129 IX86_BUILTIN_CMPLESS,
14130 IX86_BUILTIN_CMPNEQSS,
14131 IX86_BUILTIN_CMPNLTSS,
14132 IX86_BUILTIN_CMPNLESS,
14133 IX86_BUILTIN_CMPNGTSS,
14134 IX86_BUILTIN_CMPNGESS,
14135 IX86_BUILTIN_CMPORDSS,
14136 IX86_BUILTIN_CMPUNORDSS,
14138 IX86_BUILTIN_COMIEQSS,
14139 IX86_BUILTIN_COMILTSS,
14140 IX86_BUILTIN_COMILESS,
14141 IX86_BUILTIN_COMIGTSS,
14142 IX86_BUILTIN_COMIGESS,
14143 IX86_BUILTIN_COMINEQSS,
14144 IX86_BUILTIN_UCOMIEQSS,
14145 IX86_BUILTIN_UCOMILTSS,
14146 IX86_BUILTIN_UCOMILESS,
14147 IX86_BUILTIN_UCOMIGTSS,
14148 IX86_BUILTIN_UCOMIGESS,
14149 IX86_BUILTIN_UCOMINEQSS,
14151 IX86_BUILTIN_CVTPI2PS,
14152 IX86_BUILTIN_CVTPS2PI,
14153 IX86_BUILTIN_CVTSI2SS,
14154 IX86_BUILTIN_CVTSI642SS,
14155 IX86_BUILTIN_CVTSS2SI,
14156 IX86_BUILTIN_CVTSS2SI64,
14157 IX86_BUILTIN_CVTTPS2PI,
14158 IX86_BUILTIN_CVTTSS2SI,
14159 IX86_BUILTIN_CVTTSS2SI64,
14161 IX86_BUILTIN_MAXPS,
14162 IX86_BUILTIN_MAXSS,
14163 IX86_BUILTIN_MINPS,
14164 IX86_BUILTIN_MINSS,
14166 IX86_BUILTIN_LOADUPS,
14167 IX86_BUILTIN_STOREUPS,
14168 IX86_BUILTIN_MOVSS,
14170 IX86_BUILTIN_MOVHLPS,
14171 IX86_BUILTIN_MOVLHPS,
14172 IX86_BUILTIN_LOADHPS,
14173 IX86_BUILTIN_LOADLPS,
14174 IX86_BUILTIN_STOREHPS,
14175 IX86_BUILTIN_STORELPS,
14177 IX86_BUILTIN_MASKMOVQ,
14178 IX86_BUILTIN_MOVMSKPS,
14179 IX86_BUILTIN_PMOVMSKB,
14181 IX86_BUILTIN_MOVNTPS,
14182 IX86_BUILTIN_MOVNTQ,
14184 IX86_BUILTIN_LOADDQU,
14185 IX86_BUILTIN_STOREDQU,
14187 IX86_BUILTIN_PACKSSWB,
14188 IX86_BUILTIN_PACKSSDW,
14189 IX86_BUILTIN_PACKUSWB,
14191 IX86_BUILTIN_PADDB,
14192 IX86_BUILTIN_PADDW,
14193 IX86_BUILTIN_PADDD,
14194 IX86_BUILTIN_PADDQ,
14195 IX86_BUILTIN_PADDSB,
14196 IX86_BUILTIN_PADDSW,
14197 IX86_BUILTIN_PADDUSB,
14198 IX86_BUILTIN_PADDUSW,
14199 IX86_BUILTIN_PSUBB,
14200 IX86_BUILTIN_PSUBW,
14201 IX86_BUILTIN_PSUBD,
14202 IX86_BUILTIN_PSUBQ,
14203 IX86_BUILTIN_PSUBSB,
14204 IX86_BUILTIN_PSUBSW,
14205 IX86_BUILTIN_PSUBUSB,
14206 IX86_BUILTIN_PSUBUSW,
14209 IX86_BUILTIN_PANDN,
14213 IX86_BUILTIN_PAVGB,
14214 IX86_BUILTIN_PAVGW,
14216 IX86_BUILTIN_PCMPEQB,
14217 IX86_BUILTIN_PCMPEQW,
14218 IX86_BUILTIN_PCMPEQD,
14219 IX86_BUILTIN_PCMPGTB,
14220 IX86_BUILTIN_PCMPGTW,
14221 IX86_BUILTIN_PCMPGTD,
14223 IX86_BUILTIN_PMADDWD,
14225 IX86_BUILTIN_PMAXSW,
14226 IX86_BUILTIN_PMAXUB,
14227 IX86_BUILTIN_PMINSW,
14228 IX86_BUILTIN_PMINUB,
14230 IX86_BUILTIN_PMULHUW,
14231 IX86_BUILTIN_PMULHW,
14232 IX86_BUILTIN_PMULLW,
14234 IX86_BUILTIN_PSADBW,
14235 IX86_BUILTIN_PSHUFW,
14237 IX86_BUILTIN_PSLLW,
14238 IX86_BUILTIN_PSLLD,
14239 IX86_BUILTIN_PSLLQ,
14240 IX86_BUILTIN_PSRAW,
14241 IX86_BUILTIN_PSRAD,
14242 IX86_BUILTIN_PSRLW,
14243 IX86_BUILTIN_PSRLD,
14244 IX86_BUILTIN_PSRLQ,
14245 IX86_BUILTIN_PSLLWI,
14246 IX86_BUILTIN_PSLLDI,
14247 IX86_BUILTIN_PSLLQI,
14248 IX86_BUILTIN_PSRAWI,
14249 IX86_BUILTIN_PSRADI,
14250 IX86_BUILTIN_PSRLWI,
14251 IX86_BUILTIN_PSRLDI,
14252 IX86_BUILTIN_PSRLQI,
14254 IX86_BUILTIN_PUNPCKHBW,
14255 IX86_BUILTIN_PUNPCKHWD,
14256 IX86_BUILTIN_PUNPCKHDQ,
14257 IX86_BUILTIN_PUNPCKLBW,
14258 IX86_BUILTIN_PUNPCKLWD,
14259 IX86_BUILTIN_PUNPCKLDQ,
14261 IX86_BUILTIN_SHUFPS,
14263 IX86_BUILTIN_RCPPS,
14264 IX86_BUILTIN_RCPSS,
14265 IX86_BUILTIN_RSQRTPS,
14266 IX86_BUILTIN_RSQRTSS,
14267 IX86_BUILTIN_SQRTPS,
14268 IX86_BUILTIN_SQRTSS,
14270 IX86_BUILTIN_UNPCKHPS,
14271 IX86_BUILTIN_UNPCKLPS,
14273 IX86_BUILTIN_ANDPS,
14274 IX86_BUILTIN_ANDNPS,
14276 IX86_BUILTIN_XORPS,
14279 IX86_BUILTIN_LDMXCSR,
14280 IX86_BUILTIN_STMXCSR,
14281 IX86_BUILTIN_SFENCE,
14283 /* 3DNow! Original */
14284 IX86_BUILTIN_FEMMS,
14285 IX86_BUILTIN_PAVGUSB,
14286 IX86_BUILTIN_PF2ID,
14287 IX86_BUILTIN_PFACC,
14288 IX86_BUILTIN_PFADD,
14289 IX86_BUILTIN_PFCMPEQ,
14290 IX86_BUILTIN_PFCMPGE,
14291 IX86_BUILTIN_PFCMPGT,
14292 IX86_BUILTIN_PFMAX,
14293 IX86_BUILTIN_PFMIN,
14294 IX86_BUILTIN_PFMUL,
14295 IX86_BUILTIN_PFRCP,
14296 IX86_BUILTIN_PFRCPIT1,
14297 IX86_BUILTIN_PFRCPIT2,
14298 IX86_BUILTIN_PFRSQIT1,
14299 IX86_BUILTIN_PFRSQRT,
14300 IX86_BUILTIN_PFSUB,
14301 IX86_BUILTIN_PFSUBR,
14302 IX86_BUILTIN_PI2FD,
14303 IX86_BUILTIN_PMULHRW,
14305 /* 3DNow! Athlon Extensions */
14306 IX86_BUILTIN_PF2IW,
14307 IX86_BUILTIN_PFNACC,
14308 IX86_BUILTIN_PFPNACC,
14309 IX86_BUILTIN_PI2FW,
14310 IX86_BUILTIN_PSWAPDSI,
14311 IX86_BUILTIN_PSWAPDSF,
14314 IX86_BUILTIN_ADDPD,
14315 IX86_BUILTIN_ADDSD,
14316 IX86_BUILTIN_DIVPD,
14317 IX86_BUILTIN_DIVSD,
14318 IX86_BUILTIN_MULPD,
14319 IX86_BUILTIN_MULSD,
14320 IX86_BUILTIN_SUBPD,
14321 IX86_BUILTIN_SUBSD,
14323 IX86_BUILTIN_CMPEQPD,
14324 IX86_BUILTIN_CMPLTPD,
14325 IX86_BUILTIN_CMPLEPD,
14326 IX86_BUILTIN_CMPGTPD,
14327 IX86_BUILTIN_CMPGEPD,
14328 IX86_BUILTIN_CMPNEQPD,
14329 IX86_BUILTIN_CMPNLTPD,
14330 IX86_BUILTIN_CMPNLEPD,
14331 IX86_BUILTIN_CMPNGTPD,
14332 IX86_BUILTIN_CMPNGEPD,
14333 IX86_BUILTIN_CMPORDPD,
14334 IX86_BUILTIN_CMPUNORDPD,
14335 IX86_BUILTIN_CMPNEPD,
14336 IX86_BUILTIN_CMPEQSD,
14337 IX86_BUILTIN_CMPLTSD,
14338 IX86_BUILTIN_CMPLESD,
14339 IX86_BUILTIN_CMPNEQSD,
14340 IX86_BUILTIN_CMPNLTSD,
14341 IX86_BUILTIN_CMPNLESD,
14342 IX86_BUILTIN_CMPORDSD,
14343 IX86_BUILTIN_CMPUNORDSD,
14344 IX86_BUILTIN_CMPNESD,
14346 IX86_BUILTIN_COMIEQSD,
14347 IX86_BUILTIN_COMILTSD,
14348 IX86_BUILTIN_COMILESD,
14349 IX86_BUILTIN_COMIGTSD,
14350 IX86_BUILTIN_COMIGESD,
14351 IX86_BUILTIN_COMINEQSD,
14352 IX86_BUILTIN_UCOMIEQSD,
14353 IX86_BUILTIN_UCOMILTSD,
14354 IX86_BUILTIN_UCOMILESD,
14355 IX86_BUILTIN_UCOMIGTSD,
14356 IX86_BUILTIN_UCOMIGESD,
14357 IX86_BUILTIN_UCOMINEQSD,
14359 IX86_BUILTIN_MAXPD,
14360 IX86_BUILTIN_MAXSD,
14361 IX86_BUILTIN_MINPD,
14362 IX86_BUILTIN_MINSD,
14364 IX86_BUILTIN_ANDPD,
14365 IX86_BUILTIN_ANDNPD,
14367 IX86_BUILTIN_XORPD,
14369 IX86_BUILTIN_SQRTPD,
14370 IX86_BUILTIN_SQRTSD,
14372 IX86_BUILTIN_UNPCKHPD,
14373 IX86_BUILTIN_UNPCKLPD,
14375 IX86_BUILTIN_SHUFPD,
14377 IX86_BUILTIN_LOADUPD,
14378 IX86_BUILTIN_STOREUPD,
14379 IX86_BUILTIN_MOVSD,
14381 IX86_BUILTIN_LOADHPD,
14382 IX86_BUILTIN_LOADLPD,
14384 IX86_BUILTIN_CVTDQ2PD,
14385 IX86_BUILTIN_CVTDQ2PS,
14387 IX86_BUILTIN_CVTPD2DQ,
14388 IX86_BUILTIN_CVTPD2PI,
14389 IX86_BUILTIN_CVTPD2PS,
14390 IX86_BUILTIN_CVTTPD2DQ,
14391 IX86_BUILTIN_CVTTPD2PI,
14393 IX86_BUILTIN_CVTPI2PD,
14394 IX86_BUILTIN_CVTSI2SD,
14395 IX86_BUILTIN_CVTSI642SD,
14397 IX86_BUILTIN_CVTSD2SI,
14398 IX86_BUILTIN_CVTSD2SI64,
14399 IX86_BUILTIN_CVTSD2SS,
14400 IX86_BUILTIN_CVTSS2SD,
14401 IX86_BUILTIN_CVTTSD2SI,
14402 IX86_BUILTIN_CVTTSD2SI64,
14404 IX86_BUILTIN_CVTPS2DQ,
14405 IX86_BUILTIN_CVTPS2PD,
14406 IX86_BUILTIN_CVTTPS2DQ,
14408 IX86_BUILTIN_MOVNTI,
14409 IX86_BUILTIN_MOVNTPD,
14410 IX86_BUILTIN_MOVNTDQ,
14413 IX86_BUILTIN_MASKMOVDQU,
14414 IX86_BUILTIN_MOVMSKPD,
14415 IX86_BUILTIN_PMOVMSKB128,
14417 IX86_BUILTIN_PACKSSWB128,
14418 IX86_BUILTIN_PACKSSDW128,
14419 IX86_BUILTIN_PACKUSWB128,
14421 IX86_BUILTIN_PADDB128,
14422 IX86_BUILTIN_PADDW128,
14423 IX86_BUILTIN_PADDD128,
14424 IX86_BUILTIN_PADDQ128,
14425 IX86_BUILTIN_PADDSB128,
14426 IX86_BUILTIN_PADDSW128,
14427 IX86_BUILTIN_PADDUSB128,
14428 IX86_BUILTIN_PADDUSW128,
14429 IX86_BUILTIN_PSUBB128,
14430 IX86_BUILTIN_PSUBW128,
14431 IX86_BUILTIN_PSUBD128,
14432 IX86_BUILTIN_PSUBQ128,
14433 IX86_BUILTIN_PSUBSB128,
14434 IX86_BUILTIN_PSUBSW128,
14435 IX86_BUILTIN_PSUBUSB128,
14436 IX86_BUILTIN_PSUBUSW128,
14438 IX86_BUILTIN_PAND128,
14439 IX86_BUILTIN_PANDN128,
14440 IX86_BUILTIN_POR128,
14441 IX86_BUILTIN_PXOR128,
14443 IX86_BUILTIN_PAVGB128,
14444 IX86_BUILTIN_PAVGW128,
14446 IX86_BUILTIN_PCMPEQB128,
14447 IX86_BUILTIN_PCMPEQW128,
14448 IX86_BUILTIN_PCMPEQD128,
14449 IX86_BUILTIN_PCMPGTB128,
14450 IX86_BUILTIN_PCMPGTW128,
14451 IX86_BUILTIN_PCMPGTD128,
14453 IX86_BUILTIN_PMADDWD128,
14455 IX86_BUILTIN_PMAXSW128,
14456 IX86_BUILTIN_PMAXUB128,
14457 IX86_BUILTIN_PMINSW128,
14458 IX86_BUILTIN_PMINUB128,
14460 IX86_BUILTIN_PMULUDQ,
14461 IX86_BUILTIN_PMULUDQ128,
14462 IX86_BUILTIN_PMULHUW128,
14463 IX86_BUILTIN_PMULHW128,
14464 IX86_BUILTIN_PMULLW128,
14466 IX86_BUILTIN_PSADBW128,
14467 IX86_BUILTIN_PSHUFHW,
14468 IX86_BUILTIN_PSHUFLW,
14469 IX86_BUILTIN_PSHUFD,
14471 IX86_BUILTIN_PSLLW128,
14472 IX86_BUILTIN_PSLLD128,
14473 IX86_BUILTIN_PSLLQ128,
14474 IX86_BUILTIN_PSRAW128,
14475 IX86_BUILTIN_PSRAD128,
14476 IX86_BUILTIN_PSRLW128,
14477 IX86_BUILTIN_PSRLD128,
14478 IX86_BUILTIN_PSRLQ128,
14479 IX86_BUILTIN_PSLLDQI128,
14480 IX86_BUILTIN_PSLLWI128,
14481 IX86_BUILTIN_PSLLDI128,
14482 IX86_BUILTIN_PSLLQI128,
14483 IX86_BUILTIN_PSRAWI128,
14484 IX86_BUILTIN_PSRADI128,
14485 IX86_BUILTIN_PSRLDQI128,
14486 IX86_BUILTIN_PSRLWI128,
14487 IX86_BUILTIN_PSRLDI128,
14488 IX86_BUILTIN_PSRLQI128,
14490 IX86_BUILTIN_PUNPCKHBW128,
14491 IX86_BUILTIN_PUNPCKHWD128,
14492 IX86_BUILTIN_PUNPCKHDQ128,
14493 IX86_BUILTIN_PUNPCKHQDQ128,
14494 IX86_BUILTIN_PUNPCKLBW128,
14495 IX86_BUILTIN_PUNPCKLWD128,
14496 IX86_BUILTIN_PUNPCKLDQ128,
14497 IX86_BUILTIN_PUNPCKLQDQ128,
14499 IX86_BUILTIN_CLFLUSH,
14500 IX86_BUILTIN_MFENCE,
14501 IX86_BUILTIN_LFENCE,
14503 /* Prescott New Instructions. */
14504 IX86_BUILTIN_ADDSUBPS,
14505 IX86_BUILTIN_HADDPS,
14506 IX86_BUILTIN_HSUBPS,
14507 IX86_BUILTIN_MOVSHDUP,
14508 IX86_BUILTIN_MOVSLDUP,
14509 IX86_BUILTIN_ADDSUBPD,
14510 IX86_BUILTIN_HADDPD,
14511 IX86_BUILTIN_HSUBPD,
14512 IX86_BUILTIN_LDDQU,
14514 IX86_BUILTIN_MONITOR,
14515 IX86_BUILTIN_MWAIT,
14517 IX86_BUILTIN_VEC_INIT_V2SI,
14518 IX86_BUILTIN_VEC_INIT_V4HI,
14519 IX86_BUILTIN_VEC_INIT_V8QI,
14520 IX86_BUILTIN_VEC_EXT_V2DF,
14521 IX86_BUILTIN_VEC_EXT_V2DI,
14522 IX86_BUILTIN_VEC_EXT_V4SF,
14523 IX86_BUILTIN_VEC_EXT_V4SI,
14524 IX86_BUILTIN_VEC_EXT_V8HI,
14525 IX86_BUILTIN_VEC_EXT_V2SI,
14526 IX86_BUILTIN_VEC_EXT_V4HI,
14527 IX86_BUILTIN_VEC_SET_V8HI,
14528 IX86_BUILTIN_VEC_SET_V4HI,
14533 #define def_builtin(MASK, NAME, TYPE, CODE) \
14535 if ((MASK) & target_flags \
14536 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14537 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14538 NULL, NULL_TREE); \
14541 /* Bits for builtin_description.flag. */
14543 /* Set when we don't support the comparison natively, and should
14544 swap_comparison in order to support it. */
14545 #define BUILTIN_DESC_SWAP_OPERANDS 1
14547 struct builtin_description
14549 const unsigned int mask;
14550 const enum insn_code icode;
14551 const char *const name;
14552 const enum ix86_builtins code;
14553 const enum rtx_code comparison;
14554 const unsigned int flag;
14557 static const struct builtin_description bdesc_comi[] =
14559 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14560 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14561 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14562 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14563 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14564 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14565 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14566 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14567 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14568 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14569 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14570 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14571 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14572 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14573 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14574 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14575 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14576 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14577 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14578 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14579 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14580 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14581 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14582 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14585 static const struct builtin_description bdesc_2arg[] =
14588 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14589 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14590 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14591 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14592 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14593 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14594 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14595 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14597 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14598 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14599 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14600 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14601 BUILTIN_DESC_SWAP_OPERANDS },
14602 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14603 BUILTIN_DESC_SWAP_OPERANDS },
14604 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14605 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14606 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14607 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14608 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14609 BUILTIN_DESC_SWAP_OPERANDS },
14610 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14611 BUILTIN_DESC_SWAP_OPERANDS },
14612 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14613 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14614 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14615 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14616 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14617 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14618 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14619 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14620 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14621 BUILTIN_DESC_SWAP_OPERANDS },
14622 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14623 BUILTIN_DESC_SWAP_OPERANDS },
14624 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14626 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14627 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14628 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14629 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14631 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14632 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14633 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14634 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14636 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14637 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14638 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14639 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14640 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14643 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14644 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14645 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14646 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14647 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14648 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14649 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14650 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14652 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14653 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14654 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14655 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14656 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14657 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14658 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14659 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14661 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14662 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14663 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14665 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14666 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14667 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14668 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14670 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14671 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14673 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14674 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14675 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14676 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14677 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14678 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14680 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14681 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14682 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14683 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14685 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14686 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14687 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14688 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14689 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14690 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14693 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14694 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14695 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14697 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14698 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14699 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14701 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14702 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14703 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14704 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14705 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14706 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14708 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14709 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14710 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14711 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14712 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14713 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14715 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14716 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14717 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14718 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14720 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14721 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14724 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14725 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14726 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14727 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14728 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14729 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14730 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14731 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14733 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14734 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14735 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14736 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14737 BUILTIN_DESC_SWAP_OPERANDS },
14738 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14739 BUILTIN_DESC_SWAP_OPERANDS },
14740 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14741 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14742 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14743 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14744 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14745 BUILTIN_DESC_SWAP_OPERANDS },
14746 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14747 BUILTIN_DESC_SWAP_OPERANDS },
14748 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14749 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14750 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14751 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14752 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14753 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14754 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14755 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14756 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14758 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14759 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14760 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14761 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14763 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14764 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14765 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14766 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14768 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14769 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14770 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14773 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14774 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14775 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14776 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14777 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14778 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14779 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14780 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14782 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14783 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14784 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14785 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14786 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14787 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14788 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14789 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14791 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14792 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14794 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14795 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14796 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14797 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14799 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14800 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14802 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14803 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14804 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14805 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14806 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14807 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14809 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14810 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14811 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14812 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14814 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14815 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14816 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14817 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14818 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14819 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14820 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14821 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14823 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14824 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14825 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14827 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14828 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14830 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14831 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14833 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14834 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14835 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14837 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14838 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14839 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14841 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14842 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14844 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14846 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14847 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14848 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14849 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14852 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14853 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14854 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14855 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14856 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14857 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14860 static const struct builtin_description bdesc_1arg[] =
14862 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14863 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14865 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14866 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14867 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14869 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14870 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14871 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14872 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14873 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14874 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14876 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14877 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14879 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14881 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14882 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14884 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14885 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14886 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14887 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14888 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14890 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14892 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14893 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14894 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14895 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14897 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14898 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14899 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14902 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14903 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14907 ix86_init_builtins (void)
14910 ix86_init_mmx_sse_builtins ();
14913 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14914 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14917 ix86_init_mmx_sse_builtins (void)
14919 const struct builtin_description * d;
14922 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14923 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14924 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14925 tree V2DI_type_node
14926 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14927 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14928 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14929 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14930 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14931 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14932 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14934 tree pchar_type_node = build_pointer_type (char_type_node);
14935 tree pcchar_type_node = build_pointer_type (
14936 build_type_variant (char_type_node, 1, 0));
14937 tree pfloat_type_node = build_pointer_type (float_type_node);
14938 tree pcfloat_type_node = build_pointer_type (
14939 build_type_variant (float_type_node, 1, 0));
14940 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14941 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14942 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14945 tree int_ftype_v4sf_v4sf
14946 = build_function_type_list (integer_type_node,
14947 V4SF_type_node, V4SF_type_node, NULL_TREE);
14948 tree v4si_ftype_v4sf_v4sf
14949 = build_function_type_list (V4SI_type_node,
14950 V4SF_type_node, V4SF_type_node, NULL_TREE);
14951 /* MMX/SSE/integer conversions. */
14952 tree int_ftype_v4sf
14953 = build_function_type_list (integer_type_node,
14954 V4SF_type_node, NULL_TREE);
14955 tree int64_ftype_v4sf
14956 = build_function_type_list (long_long_integer_type_node,
14957 V4SF_type_node, NULL_TREE);
14958 tree int_ftype_v8qi
14959 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
14960 tree v4sf_ftype_v4sf_int
14961 = build_function_type_list (V4SF_type_node,
14962 V4SF_type_node, integer_type_node, NULL_TREE);
14963 tree v4sf_ftype_v4sf_int64
14964 = build_function_type_list (V4SF_type_node,
14965 V4SF_type_node, long_long_integer_type_node,
14967 tree v4sf_ftype_v4sf_v2si
14968 = build_function_type_list (V4SF_type_node,
14969 V4SF_type_node, V2SI_type_node, NULL_TREE);
14971 /* Miscellaneous. */
14972 tree v8qi_ftype_v4hi_v4hi
14973 = build_function_type_list (V8QI_type_node,
14974 V4HI_type_node, V4HI_type_node, NULL_TREE);
14975 tree v4hi_ftype_v2si_v2si
14976 = build_function_type_list (V4HI_type_node,
14977 V2SI_type_node, V2SI_type_node, NULL_TREE);
14978 tree v4sf_ftype_v4sf_v4sf_int
14979 = build_function_type_list (V4SF_type_node,
14980 V4SF_type_node, V4SF_type_node,
14981 integer_type_node, NULL_TREE);
14982 tree v2si_ftype_v4hi_v4hi
14983 = build_function_type_list (V2SI_type_node,
14984 V4HI_type_node, V4HI_type_node, NULL_TREE);
14985 tree v4hi_ftype_v4hi_int
14986 = build_function_type_list (V4HI_type_node,
14987 V4HI_type_node, integer_type_node, NULL_TREE);
14988 tree v4hi_ftype_v4hi_di
14989 = build_function_type_list (V4HI_type_node,
14990 V4HI_type_node, long_long_unsigned_type_node,
14992 tree v2si_ftype_v2si_di
14993 = build_function_type_list (V2SI_type_node,
14994 V2SI_type_node, long_long_unsigned_type_node,
14996 tree void_ftype_void
14997 = build_function_type (void_type_node, void_list_node);
14998 tree void_ftype_unsigned
14999 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15000 tree void_ftype_unsigned_unsigned
15001 = build_function_type_list (void_type_node, unsigned_type_node,
15002 unsigned_type_node, NULL_TREE);
15003 tree void_ftype_pcvoid_unsigned_unsigned
15004 = build_function_type_list (void_type_node, const_ptr_type_node,
15005 unsigned_type_node, unsigned_type_node,
15007 tree unsigned_ftype_void
15008 = build_function_type (unsigned_type_node, void_list_node);
15009 tree v2si_ftype_v4sf
15010 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15011 /* Loads/stores. */
15012 tree void_ftype_v8qi_v8qi_pchar
15013 = build_function_type_list (void_type_node,
15014 V8QI_type_node, V8QI_type_node,
15015 pchar_type_node, NULL_TREE);
15016 tree v4sf_ftype_pcfloat
15017 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15018 /* @@@ the type is bogus */
15019 tree v4sf_ftype_v4sf_pv2si
15020 = build_function_type_list (V4SF_type_node,
15021 V4SF_type_node, pv2si_type_node, NULL_TREE);
15022 tree void_ftype_pv2si_v4sf
15023 = build_function_type_list (void_type_node,
15024 pv2si_type_node, V4SF_type_node, NULL_TREE);
15025 tree void_ftype_pfloat_v4sf
15026 = build_function_type_list (void_type_node,
15027 pfloat_type_node, V4SF_type_node, NULL_TREE);
15028 tree void_ftype_pdi_di
15029 = build_function_type_list (void_type_node,
15030 pdi_type_node, long_long_unsigned_type_node,
15032 tree void_ftype_pv2di_v2di
15033 = build_function_type_list (void_type_node,
15034 pv2di_type_node, V2DI_type_node, NULL_TREE);
15035 /* Normal vector unops. */
15036 tree v4sf_ftype_v4sf
15037 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15039 /* Normal vector binops. */
15040 tree v4sf_ftype_v4sf_v4sf
15041 = build_function_type_list (V4SF_type_node,
15042 V4SF_type_node, V4SF_type_node, NULL_TREE);
15043 tree v8qi_ftype_v8qi_v8qi
15044 = build_function_type_list (V8QI_type_node,
15045 V8QI_type_node, V8QI_type_node, NULL_TREE);
15046 tree v4hi_ftype_v4hi_v4hi
15047 = build_function_type_list (V4HI_type_node,
15048 V4HI_type_node, V4HI_type_node, NULL_TREE);
15049 tree v2si_ftype_v2si_v2si
15050 = build_function_type_list (V2SI_type_node,
15051 V2SI_type_node, V2SI_type_node, NULL_TREE);
15052 tree di_ftype_di_di
15053 = build_function_type_list (long_long_unsigned_type_node,
15054 long_long_unsigned_type_node,
15055 long_long_unsigned_type_node, NULL_TREE);
15057 tree v2si_ftype_v2sf
15058 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15059 tree v2sf_ftype_v2si
15060 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15061 tree v2si_ftype_v2si
15062 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15063 tree v2sf_ftype_v2sf
15064 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15065 tree v2sf_ftype_v2sf_v2sf
15066 = build_function_type_list (V2SF_type_node,
15067 V2SF_type_node, V2SF_type_node, NULL_TREE);
15068 tree v2si_ftype_v2sf_v2sf
15069 = build_function_type_list (V2SI_type_node,
15070 V2SF_type_node, V2SF_type_node, NULL_TREE);
15071 tree pint_type_node = build_pointer_type (integer_type_node);
15072 tree pdouble_type_node = build_pointer_type (double_type_node);
15073 tree pcdouble_type_node = build_pointer_type (
15074 build_type_variant (double_type_node, 1, 0));
15075 tree int_ftype_v2df_v2df
15076 = build_function_type_list (integer_type_node,
15077 V2DF_type_node, V2DF_type_node, NULL_TREE);
15079 tree void_ftype_pcvoid
15080 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15081 tree v4sf_ftype_v4si
15082 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15083 tree v4si_ftype_v4sf
15084 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15085 tree v2df_ftype_v4si
15086 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15087 tree v4si_ftype_v2df
15088 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15089 tree v2si_ftype_v2df
15090 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15091 tree v4sf_ftype_v2df
15092 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15093 tree v2df_ftype_v2si
15094 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15095 tree v2df_ftype_v4sf
15096 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15097 tree int_ftype_v2df
15098 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15099 tree int64_ftype_v2df
15100 = build_function_type_list (long_long_integer_type_node,
15101 V2DF_type_node, NULL_TREE);
15102 tree v2df_ftype_v2df_int
15103 = build_function_type_list (V2DF_type_node,
15104 V2DF_type_node, integer_type_node, NULL_TREE);
15105 tree v2df_ftype_v2df_int64
15106 = build_function_type_list (V2DF_type_node,
15107 V2DF_type_node, long_long_integer_type_node,
15109 tree v4sf_ftype_v4sf_v2df
15110 = build_function_type_list (V4SF_type_node,
15111 V4SF_type_node, V2DF_type_node, NULL_TREE);
15112 tree v2df_ftype_v2df_v4sf
15113 = build_function_type_list (V2DF_type_node,
15114 V2DF_type_node, V4SF_type_node, NULL_TREE);
15115 tree v2df_ftype_v2df_v2df_int
15116 = build_function_type_list (V2DF_type_node,
15117 V2DF_type_node, V2DF_type_node,
15120 tree v2df_ftype_v2df_pcdouble
15121 = build_function_type_list (V2DF_type_node,
15122 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15123 tree void_ftype_pdouble_v2df
15124 = build_function_type_list (void_type_node,
15125 pdouble_type_node, V2DF_type_node, NULL_TREE);
15126 tree void_ftype_pint_int
15127 = build_function_type_list (void_type_node,
15128 pint_type_node, integer_type_node, NULL_TREE);
15129 tree void_ftype_v16qi_v16qi_pchar
15130 = build_function_type_list (void_type_node,
15131 V16QI_type_node, V16QI_type_node,
15132 pchar_type_node, NULL_TREE);
15133 tree v2df_ftype_pcdouble
15134 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15135 tree v2df_ftype_v2df_v2df
15136 = build_function_type_list (V2DF_type_node,
15137 V2DF_type_node, V2DF_type_node, NULL_TREE);
15138 tree v16qi_ftype_v16qi_v16qi
15139 = build_function_type_list (V16QI_type_node,
15140 V16QI_type_node, V16QI_type_node, NULL_TREE);
15141 tree v8hi_ftype_v8hi_v8hi
15142 = build_function_type_list (V8HI_type_node,
15143 V8HI_type_node, V8HI_type_node, NULL_TREE);
15144 tree v4si_ftype_v4si_v4si
15145 = build_function_type_list (V4SI_type_node,
15146 V4SI_type_node, V4SI_type_node, NULL_TREE);
15147 tree v2di_ftype_v2di_v2di
15148 = build_function_type_list (V2DI_type_node,
15149 V2DI_type_node, V2DI_type_node, NULL_TREE);
15150 tree v2di_ftype_v2df_v2df
15151 = build_function_type_list (V2DI_type_node,
15152 V2DF_type_node, V2DF_type_node, NULL_TREE);
15153 tree v2df_ftype_v2df
15154 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15155 tree v2di_ftype_v2di_int
15156 = build_function_type_list (V2DI_type_node,
15157 V2DI_type_node, integer_type_node, NULL_TREE);
15158 tree v4si_ftype_v4si_int
15159 = build_function_type_list (V4SI_type_node,
15160 V4SI_type_node, integer_type_node, NULL_TREE);
15161 tree v8hi_ftype_v8hi_int
15162 = build_function_type_list (V8HI_type_node,
15163 V8HI_type_node, integer_type_node, NULL_TREE);
15164 tree v8hi_ftype_v8hi_v2di
15165 = build_function_type_list (V8HI_type_node,
15166 V8HI_type_node, V2DI_type_node, NULL_TREE);
15167 tree v4si_ftype_v4si_v2di
15168 = build_function_type_list (V4SI_type_node,
15169 V4SI_type_node, V2DI_type_node, NULL_TREE);
15170 tree v4si_ftype_v8hi_v8hi
15171 = build_function_type_list (V4SI_type_node,
15172 V8HI_type_node, V8HI_type_node, NULL_TREE);
15173 tree di_ftype_v8qi_v8qi
15174 = build_function_type_list (long_long_unsigned_type_node,
15175 V8QI_type_node, V8QI_type_node, NULL_TREE);
15176 tree di_ftype_v2si_v2si
15177 = build_function_type_list (long_long_unsigned_type_node,
15178 V2SI_type_node, V2SI_type_node, NULL_TREE);
15179 tree v2di_ftype_v16qi_v16qi
15180 = build_function_type_list (V2DI_type_node,
15181 V16QI_type_node, V16QI_type_node, NULL_TREE);
15182 tree v2di_ftype_v4si_v4si
15183 = build_function_type_list (V2DI_type_node,
15184 V4SI_type_node, V4SI_type_node, NULL_TREE);
15185 tree int_ftype_v16qi
15186 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15187 tree v16qi_ftype_pcchar
15188 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15189 tree void_ftype_pchar_v16qi
15190 = build_function_type_list (void_type_node,
15191 pchar_type_node, V16QI_type_node, NULL_TREE);
15194 tree float128_type;
15197 /* The __float80 type. */
15198 if (TYPE_MODE (long_double_type_node) == XFmode)
15199 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15203 /* The __float80 type. */
15204 float80_type = make_node (REAL_TYPE);
15205 TYPE_PRECISION (float80_type) = 80;
15206 layout_type (float80_type);
15207 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15212 float128_type = make_node (REAL_TYPE);
15213 TYPE_PRECISION (float128_type) = 128;
15214 layout_type (float128_type);
15215 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15218 /* Add all builtins that are more or less simple operations on two
15220 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15222 /* Use one of the operands; the target can have a different mode for
15223 mask-generating compares. */
15224 enum machine_mode mode;
15229 mode = insn_data[d->icode].operand[1].mode;
15234 type = v16qi_ftype_v16qi_v16qi;
15237 type = v8hi_ftype_v8hi_v8hi;
15240 type = v4si_ftype_v4si_v4si;
15243 type = v2di_ftype_v2di_v2di;
15246 type = v2df_ftype_v2df_v2df;
15249 type = v4sf_ftype_v4sf_v4sf;
15252 type = v8qi_ftype_v8qi_v8qi;
15255 type = v4hi_ftype_v4hi_v4hi;
15258 type = v2si_ftype_v2si_v2si;
15261 type = di_ftype_di_di;
15265 gcc_unreachable ();
15268 /* Override for comparisons. */
15269 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15270 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15271 type = v4si_ftype_v4sf_v4sf;
15273 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15274 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15275 type = v2di_ftype_v2df_v2df;
15277 def_builtin (d->mask, d->name, type, d->code);
15280 /* Add the remaining MMX insns with somewhat more complicated types. */
15281 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15282 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15283 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15284 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15286 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15287 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15288 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15290 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15291 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15293 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15294 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15296 /* comi/ucomi insns. */
15297 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15298 if (d->mask == MASK_SSE2)
15299 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15301 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15303 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15304 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15305 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15307 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15308 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15309 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15310 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15311 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15312 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15313 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15314 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15315 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15316 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15317 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15319 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15321 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15322 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15324 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15325 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15326 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15327 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15329 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15330 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15331 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15332 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15334 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15336 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15338 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15339 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15340 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15341 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15342 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15343 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15345 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15347 /* Original 3DNow! */
15348 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15349 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15350 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15351 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15352 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15353 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15354 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15355 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15356 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15357 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15358 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15359 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15360 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15361 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15362 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15363 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15364 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15365 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15366 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15367 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15369 /* 3DNow! extension as used in the Athlon CPU. */
15370 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15371 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15372 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15373 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15374 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15375 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15378 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15380 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15381 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15383 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15384 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15386 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15387 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15388 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15389 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15390 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15392 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15393 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15394 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15395 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15397 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15398 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15400 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15402 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15403 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15405 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15406 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15407 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15408 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15409 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15411 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15413 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15414 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15415 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15416 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15418 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15419 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15420 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15422 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15423 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15424 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15425 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15427 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15428 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15429 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15431 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15432 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15434 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15435 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15437 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15438 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15439 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15441 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15442 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15443 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15445 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15446 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15448 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15449 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15450 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15451 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15453 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15454 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15455 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15456 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15458 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15459 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15461 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15463 /* Prescott New Instructions. */
15464 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15465 void_ftype_pcvoid_unsigned_unsigned,
15466 IX86_BUILTIN_MONITOR);
15467 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15468 void_ftype_unsigned_unsigned,
15469 IX86_BUILTIN_MWAIT);
15470 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15472 IX86_BUILTIN_MOVSHDUP);
15473 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15475 IX86_BUILTIN_MOVSLDUP);
15476 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15477 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15479 /* Access to the vec_init patterns. */
15480 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15481 integer_type_node, NULL_TREE);
15482 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15483 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15485 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15486 short_integer_type_node,
15487 short_integer_type_node,
15488 short_integer_type_node, NULL_TREE);
15489 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15490 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15492 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15493 char_type_node, char_type_node,
15494 char_type_node, char_type_node,
15495 char_type_node, char_type_node,
15496 char_type_node, NULL_TREE);
15497 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15498 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15500 /* Access to the vec_extract patterns. */
15501 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15502 integer_type_node, NULL_TREE);
15503 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15504 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15506 ftype = build_function_type_list (long_long_integer_type_node,
15507 V2DI_type_node, integer_type_node,
15509 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15510 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15512 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15513 integer_type_node, NULL_TREE);
15514 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15515 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15517 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15518 integer_type_node, NULL_TREE);
15519 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15520 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15522 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15523 integer_type_node, NULL_TREE);
15524 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15525 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15527 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15528 integer_type_node, NULL_TREE);
15529 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15530 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15532 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15533 integer_type_node, NULL_TREE);
15534 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15535 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15537 /* Access to the vec_set patterns. */
15538 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15540 integer_type_node, NULL_TREE);
15541 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15542 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15544 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15546 integer_type_node, NULL_TREE);
15547 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15548 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15551 /* Errors in the source file can cause expand_expr to return const0_rtx
15552 where we expect a vector. To avoid crashing, use one of the vector
15553 clear instructions. */
15555 safe_vector_operand (rtx x, enum machine_mode mode)
15557 if (x == const0_rtx)
15558 x = CONST0_RTX (mode);
15562 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15565 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15568 tree arg0 = TREE_VALUE (arglist);
15569 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15570 rtx op0 = expand_normal (arg0);
15571 rtx op1 = expand_normal (arg1);
15572 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15573 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15574 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15576 if (VECTOR_MODE_P (mode0))
15577 op0 = safe_vector_operand (op0, mode0);
15578 if (VECTOR_MODE_P (mode1))
15579 op1 = safe_vector_operand (op1, mode1);
15581 if (optimize || !target
15582 || GET_MODE (target) != tmode
15583 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15584 target = gen_reg_rtx (tmode);
15586 if (GET_MODE (op1) == SImode && mode1 == TImode)
15588 rtx x = gen_reg_rtx (V4SImode);
15589 emit_insn (gen_sse2_loadd (x, op1));
15590 op1 = gen_lowpart (TImode, x);
15593 /* The insn must want input operands in the same modes as the
15595 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15596 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15598 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15599 op0 = copy_to_mode_reg (mode0, op0);
15600 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15601 op1 = copy_to_mode_reg (mode1, op1);
15603 /* ??? Using ix86_fixup_binary_operands is problematic when
15604 we've got mismatched modes. Fake it. */
15610 if (tmode == mode0 && tmode == mode1)
15612 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15616 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15618 op0 = force_reg (mode0, op0);
15619 op1 = force_reg (mode1, op1);
15620 target = gen_reg_rtx (tmode);
15623 pat = GEN_FCN (icode) (target, op0, op1);
15630 /* Subroutine of ix86_expand_builtin to take care of stores. */
15633 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15636 tree arg0 = TREE_VALUE (arglist);
15637 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15638 rtx op0 = expand_normal (arg0);
15639 rtx op1 = expand_normal (arg1);
15640 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15641 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15643 if (VECTOR_MODE_P (mode1))
15644 op1 = safe_vector_operand (op1, mode1);
15646 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15647 op1 = copy_to_mode_reg (mode1, op1);
15649 pat = GEN_FCN (icode) (op0, op1);
15655 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15658 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15659 rtx target, int do_load)
15662 tree arg0 = TREE_VALUE (arglist);
15663 rtx op0 = expand_normal (arg0);
15664 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15665 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15667 if (optimize || !target
15668 || GET_MODE (target) != tmode
15669 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15670 target = gen_reg_rtx (tmode);
15672 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15675 if (VECTOR_MODE_P (mode0))
15676 op0 = safe_vector_operand (op0, mode0);
15678 if ((optimize && !register_operand (op0, mode0))
15679 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15680 op0 = copy_to_mode_reg (mode0, op0);
15683 pat = GEN_FCN (icode) (target, op0);
15690 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15691 sqrtss, rsqrtss, rcpss. */
15694 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15697 tree arg0 = TREE_VALUE (arglist);
15698 rtx op1, op0 = expand_normal (arg0);
15699 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15700 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15702 if (optimize || !target
15703 || GET_MODE (target) != tmode
15704 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15705 target = gen_reg_rtx (tmode);
15707 if (VECTOR_MODE_P (mode0))
15708 op0 = safe_vector_operand (op0, mode0);
15710 if ((optimize && !register_operand (op0, mode0))
15711 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15712 op0 = copy_to_mode_reg (mode0, op0);
15715 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15716 op1 = copy_to_mode_reg (mode0, op1);
15718 pat = GEN_FCN (icode) (target, op0, op1);
15725 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15728 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15732 tree arg0 = TREE_VALUE (arglist);
15733 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15734 rtx op0 = expand_normal (arg0);
15735 rtx op1 = expand_normal (arg1);
15737 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15738 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15739 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15740 enum rtx_code comparison = d->comparison;
15742 if (VECTOR_MODE_P (mode0))
15743 op0 = safe_vector_operand (op0, mode0);
15744 if (VECTOR_MODE_P (mode1))
15745 op1 = safe_vector_operand (op1, mode1);
15747 /* Swap operands if we have a comparison that isn't available in
15749 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15751 rtx tmp = gen_reg_rtx (mode1);
15752 emit_move_insn (tmp, op1);
15757 if (optimize || !target
15758 || GET_MODE (target) != tmode
15759 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15760 target = gen_reg_rtx (tmode);
15762 if ((optimize && !register_operand (op0, mode0))
15763 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15764 op0 = copy_to_mode_reg (mode0, op0);
15765 if ((optimize && !register_operand (op1, mode1))
15766 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15767 op1 = copy_to_mode_reg (mode1, op1);
15769 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15770 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15777 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15780 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15784 tree arg0 = TREE_VALUE (arglist);
15785 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15786 rtx op0 = expand_normal (arg0);
15787 rtx op1 = expand_normal (arg1);
15789 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15790 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15791 enum rtx_code comparison = d->comparison;
15793 if (VECTOR_MODE_P (mode0))
15794 op0 = safe_vector_operand (op0, mode0);
15795 if (VECTOR_MODE_P (mode1))
15796 op1 = safe_vector_operand (op1, mode1);
15798 /* Swap operands if we have a comparison that isn't available in
15800 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15807 target = gen_reg_rtx (SImode);
15808 emit_move_insn (target, const0_rtx);
15809 target = gen_rtx_SUBREG (QImode, target, 0);
15811 if ((optimize && !register_operand (op0, mode0))
15812 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15813 op0 = copy_to_mode_reg (mode0, op0);
15814 if ((optimize && !register_operand (op1, mode1))
15815 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15816 op1 = copy_to_mode_reg (mode1, op1);
15818 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15819 pat = GEN_FCN (d->icode) (op0, op1);
15823 emit_insn (gen_rtx_SET (VOIDmode,
15824 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15825 gen_rtx_fmt_ee (comparison, QImode,
15829 return SUBREG_REG (target);
15832 /* Return the integer constant in ARG. Constrain it to be in the range
15833 of the subparts of VEC_TYPE; issue an error if not. */
15836 get_element_number (tree vec_type, tree arg)
15838 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15840 if (!host_integerp (arg, 1)
15841 || (elt = tree_low_cst (arg, 1), elt > max))
15843 error ("selector must be an integer constant in the range 0..%wi", max);
15850 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15851 ix86_expand_vector_init. We DO have language-level syntax for this, in
15852 the form of (type){ init-list }. Except that since we can't place emms
15853 instructions from inside the compiler, we can't allow the use of MMX
15854 registers unless the user explicitly asks for it. So we do *not* define
15855 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15856 we have builtins invoked by mmintrin.h that gives us license to emit
15857 these sorts of instructions. */
15860 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15862 enum machine_mode tmode = TYPE_MODE (type);
15863 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15864 int i, n_elt = GET_MODE_NUNITS (tmode);
15865 rtvec v = rtvec_alloc (n_elt);
15867 gcc_assert (VECTOR_MODE_P (tmode));
15869 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15871 rtx x = expand_normal (TREE_VALUE (arglist));
15872 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15875 gcc_assert (arglist == NULL);
15877 if (!target || !register_operand (target, tmode))
15878 target = gen_reg_rtx (tmode);
15880 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15884 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15885 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15886 had a language-level syntax for referencing vector elements. */
15889 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15891 enum machine_mode tmode, mode0;
15896 arg0 = TREE_VALUE (arglist);
15897 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15899 op0 = expand_normal (arg0);
15900 elt = get_element_number (TREE_TYPE (arg0), arg1);
15902 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15903 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15904 gcc_assert (VECTOR_MODE_P (mode0));
15906 op0 = force_reg (mode0, op0);
15908 if (optimize || !target || !register_operand (target, tmode))
15909 target = gen_reg_rtx (tmode);
15911 ix86_expand_vector_extract (true, target, op0, elt);
15916 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15917 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15918 a language-level syntax for referencing vector elements. */
15921 ix86_expand_vec_set_builtin (tree arglist)
15923 enum machine_mode tmode, mode1;
15924 tree arg0, arg1, arg2;
15928 arg0 = TREE_VALUE (arglist);
15929 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15930 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15932 tmode = TYPE_MODE (TREE_TYPE (arg0));
15933 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15934 gcc_assert (VECTOR_MODE_P (tmode));
15936 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15937 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15938 elt = get_element_number (TREE_TYPE (arg0), arg2);
15940 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15941 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15943 op0 = force_reg (tmode, op0);
15944 op1 = force_reg (mode1, op1);
15946 ix86_expand_vector_set (true, op0, op1, elt);
15951 /* Expand an expression EXP that calls a built-in function,
15952 with result going to TARGET if that's convenient
15953 (and in mode MODE if that's convenient).
15954 SUBTARGET may be used as the target for computing one of EXP's operands.
15955 IGNORE is nonzero if the value is to be ignored. */
15958 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15959 enum machine_mode mode ATTRIBUTE_UNUSED,
15960 int ignore ATTRIBUTE_UNUSED)
15962 const struct builtin_description *d;
15964 enum insn_code icode;
15965 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
15966 tree arglist = TREE_OPERAND (exp, 1);
15967 tree arg0, arg1, arg2;
15968 rtx op0, op1, op2, pat;
15969 enum machine_mode tmode, mode0, mode1, mode2;
15970 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15974 case IX86_BUILTIN_EMMS:
15975 emit_insn (gen_mmx_emms ());
15978 case IX86_BUILTIN_SFENCE:
15979 emit_insn (gen_sse_sfence ());
15982 case IX86_BUILTIN_MASKMOVQ:
15983 case IX86_BUILTIN_MASKMOVDQU:
15984 icode = (fcode == IX86_BUILTIN_MASKMOVQ
15985 ? CODE_FOR_mmx_maskmovq
15986 : CODE_FOR_sse2_maskmovdqu);
15987 /* Note the arg order is different from the operand order. */
15988 arg1 = TREE_VALUE (arglist);
15989 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
15990 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15991 op0 = expand_normal (arg0);
15992 op1 = expand_normal (arg1);
15993 op2 = expand_normal (arg2);
15994 mode0 = insn_data[icode].operand[0].mode;
15995 mode1 = insn_data[icode].operand[1].mode;
15996 mode2 = insn_data[icode].operand[2].mode;
15998 op0 = force_reg (Pmode, op0);
15999 op0 = gen_rtx_MEM (mode1, op0);
16001 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16002 op0 = copy_to_mode_reg (mode0, op0);
16003 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16004 op1 = copy_to_mode_reg (mode1, op1);
16005 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16006 op2 = copy_to_mode_reg (mode2, op2);
16007 pat = GEN_FCN (icode) (op0, op1, op2);
16013 case IX86_BUILTIN_SQRTSS:
16014 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16015 case IX86_BUILTIN_RSQRTSS:
16016 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16017 case IX86_BUILTIN_RCPSS:
16018 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16020 case IX86_BUILTIN_LOADUPS:
16021 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16023 case IX86_BUILTIN_STOREUPS:
16024 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16026 case IX86_BUILTIN_LOADHPS:
16027 case IX86_BUILTIN_LOADLPS:
16028 case IX86_BUILTIN_LOADHPD:
16029 case IX86_BUILTIN_LOADLPD:
16030 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16031 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16032 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16033 : CODE_FOR_sse2_loadlpd);
16034 arg0 = TREE_VALUE (arglist);
16035 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16036 op0 = expand_normal (arg0);
16037 op1 = expand_normal (arg1);
16038 tmode = insn_data[icode].operand[0].mode;
16039 mode0 = insn_data[icode].operand[1].mode;
16040 mode1 = insn_data[icode].operand[2].mode;
16042 op0 = force_reg (mode0, op0);
16043 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16044 if (optimize || target == 0
16045 || GET_MODE (target) != tmode
16046 || !register_operand (target, tmode))
16047 target = gen_reg_rtx (tmode);
16048 pat = GEN_FCN (icode) (target, op0, op1);
16054 case IX86_BUILTIN_STOREHPS:
16055 case IX86_BUILTIN_STORELPS:
16056 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16057 : CODE_FOR_sse_storelps);
16058 arg0 = TREE_VALUE (arglist);
16059 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16060 op0 = expand_normal (arg0);
16061 op1 = expand_normal (arg1);
16062 mode0 = insn_data[icode].operand[0].mode;
16063 mode1 = insn_data[icode].operand[1].mode;
16065 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16066 op1 = force_reg (mode1, op1);
16068 pat = GEN_FCN (icode) (op0, op1);
16074 case IX86_BUILTIN_MOVNTPS:
16075 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16076 case IX86_BUILTIN_MOVNTQ:
16077 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16079 case IX86_BUILTIN_LDMXCSR:
16080 op0 = expand_normal (TREE_VALUE (arglist));
16081 target = assign_386_stack_local (SImode, SLOT_TEMP);
16082 emit_move_insn (target, op0);
16083 emit_insn (gen_sse_ldmxcsr (target));
16086 case IX86_BUILTIN_STMXCSR:
16087 target = assign_386_stack_local (SImode, SLOT_TEMP);
16088 emit_insn (gen_sse_stmxcsr (target));
16089 return copy_to_mode_reg (SImode, target);
16091 case IX86_BUILTIN_SHUFPS:
16092 case IX86_BUILTIN_SHUFPD:
16093 icode = (fcode == IX86_BUILTIN_SHUFPS
16094 ? CODE_FOR_sse_shufps
16095 : CODE_FOR_sse2_shufpd);
16096 arg0 = TREE_VALUE (arglist);
16097 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16098 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16099 op0 = expand_normal (arg0);
16100 op1 = expand_normal (arg1);
16101 op2 = expand_normal (arg2);
16102 tmode = insn_data[icode].operand[0].mode;
16103 mode0 = insn_data[icode].operand[1].mode;
16104 mode1 = insn_data[icode].operand[2].mode;
16105 mode2 = insn_data[icode].operand[3].mode;
16107 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16108 op0 = copy_to_mode_reg (mode0, op0);
16109 if ((optimize && !register_operand (op1, mode1))
16110 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16111 op1 = copy_to_mode_reg (mode1, op1);
16112 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16114 /* @@@ better error message */
16115 error ("mask must be an immediate");
16116 return gen_reg_rtx (tmode);
16118 if (optimize || target == 0
16119 || GET_MODE (target) != tmode
16120 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16121 target = gen_reg_rtx (tmode);
16122 pat = GEN_FCN (icode) (target, op0, op1, op2);
16128 case IX86_BUILTIN_PSHUFW:
16129 case IX86_BUILTIN_PSHUFD:
16130 case IX86_BUILTIN_PSHUFHW:
16131 case IX86_BUILTIN_PSHUFLW:
16132 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16133 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16134 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16135 : CODE_FOR_mmx_pshufw);
16136 arg0 = TREE_VALUE (arglist);
16137 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16138 op0 = expand_normal (arg0);
16139 op1 = expand_normal (arg1);
16140 tmode = insn_data[icode].operand[0].mode;
16141 mode1 = insn_data[icode].operand[1].mode;
16142 mode2 = insn_data[icode].operand[2].mode;
16144 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16145 op0 = copy_to_mode_reg (mode1, op0);
16146 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16148 /* @@@ better error message */
16149 error ("mask must be an immediate");
16153 || GET_MODE (target) != tmode
16154 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16155 target = gen_reg_rtx (tmode);
16156 pat = GEN_FCN (icode) (target, op0, op1);
16162 case IX86_BUILTIN_PSLLDQI128:
16163 case IX86_BUILTIN_PSRLDQI128:
16164 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16165 : CODE_FOR_sse2_lshrti3);
16166 arg0 = TREE_VALUE (arglist);
16167 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16168 op0 = expand_normal (arg0);
16169 op1 = expand_normal (arg1);
16170 tmode = insn_data[icode].operand[0].mode;
16171 mode1 = insn_data[icode].operand[1].mode;
16172 mode2 = insn_data[icode].operand[2].mode;
16174 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16176 op0 = copy_to_reg (op0);
16177 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16179 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16181 error ("shift must be an immediate");
16184 target = gen_reg_rtx (V2DImode);
16185 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16191 case IX86_BUILTIN_FEMMS:
16192 emit_insn (gen_mmx_femms ());
16195 case IX86_BUILTIN_PAVGUSB:
16196 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16198 case IX86_BUILTIN_PF2ID:
16199 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16201 case IX86_BUILTIN_PFACC:
16202 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16204 case IX86_BUILTIN_PFADD:
16205 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16207 case IX86_BUILTIN_PFCMPEQ:
16208 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16210 case IX86_BUILTIN_PFCMPGE:
16211 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16213 case IX86_BUILTIN_PFCMPGT:
16214 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16216 case IX86_BUILTIN_PFMAX:
16217 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16219 case IX86_BUILTIN_PFMIN:
16220 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16222 case IX86_BUILTIN_PFMUL:
16223 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16225 case IX86_BUILTIN_PFRCP:
16226 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16228 case IX86_BUILTIN_PFRCPIT1:
16229 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16231 case IX86_BUILTIN_PFRCPIT2:
16232 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16234 case IX86_BUILTIN_PFRSQIT1:
16235 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16237 case IX86_BUILTIN_PFRSQRT:
16238 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16240 case IX86_BUILTIN_PFSUB:
16241 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16243 case IX86_BUILTIN_PFSUBR:
16244 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16246 case IX86_BUILTIN_PI2FD:
16247 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16249 case IX86_BUILTIN_PMULHRW:
16250 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16252 case IX86_BUILTIN_PF2IW:
16253 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16255 case IX86_BUILTIN_PFNACC:
16256 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16258 case IX86_BUILTIN_PFPNACC:
16259 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16261 case IX86_BUILTIN_PI2FW:
16262 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16264 case IX86_BUILTIN_PSWAPDSI:
16265 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16267 case IX86_BUILTIN_PSWAPDSF:
16268 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16270 case IX86_BUILTIN_SQRTSD:
16271 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16272 case IX86_BUILTIN_LOADUPD:
16273 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16274 case IX86_BUILTIN_STOREUPD:
16275 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16277 case IX86_BUILTIN_MFENCE:
16278 emit_insn (gen_sse2_mfence ());
16280 case IX86_BUILTIN_LFENCE:
16281 emit_insn (gen_sse2_lfence ());
16284 case IX86_BUILTIN_CLFLUSH:
16285 arg0 = TREE_VALUE (arglist);
16286 op0 = expand_normal (arg0);
16287 icode = CODE_FOR_sse2_clflush;
16288 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16289 op0 = copy_to_mode_reg (Pmode, op0);
16291 emit_insn (gen_sse2_clflush (op0));
16294 case IX86_BUILTIN_MOVNTPD:
16295 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16296 case IX86_BUILTIN_MOVNTDQ:
16297 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16298 case IX86_BUILTIN_MOVNTI:
16299 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16301 case IX86_BUILTIN_LOADDQU:
16302 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16303 case IX86_BUILTIN_STOREDQU:
16304 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16306 case IX86_BUILTIN_MONITOR:
16307 arg0 = TREE_VALUE (arglist);
16308 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16309 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16310 op0 = expand_normal (arg0);
16311 op1 = expand_normal (arg1);
16312 op2 = expand_normal (arg2);
16314 op0 = copy_to_mode_reg (Pmode, op0);
16316 op1 = copy_to_mode_reg (SImode, op1);
16318 op2 = copy_to_mode_reg (SImode, op2);
16320 emit_insn (gen_sse3_monitor (op0, op1, op2));
16322 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16325 case IX86_BUILTIN_MWAIT:
16326 arg0 = TREE_VALUE (arglist);
16327 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16328 op0 = expand_normal (arg0);
16329 op1 = expand_normal (arg1);
16331 op0 = copy_to_mode_reg (SImode, op0);
16333 op1 = copy_to_mode_reg (SImode, op1);
16334 emit_insn (gen_sse3_mwait (op0, op1));
16337 case IX86_BUILTIN_LDDQU:
16338 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16341 case IX86_BUILTIN_VEC_INIT_V2SI:
16342 case IX86_BUILTIN_VEC_INIT_V4HI:
16343 case IX86_BUILTIN_VEC_INIT_V8QI:
16344 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16346 case IX86_BUILTIN_VEC_EXT_V2DF:
16347 case IX86_BUILTIN_VEC_EXT_V2DI:
16348 case IX86_BUILTIN_VEC_EXT_V4SF:
16349 case IX86_BUILTIN_VEC_EXT_V4SI:
16350 case IX86_BUILTIN_VEC_EXT_V8HI:
16351 case IX86_BUILTIN_VEC_EXT_V2SI:
16352 case IX86_BUILTIN_VEC_EXT_V4HI:
16353 return ix86_expand_vec_ext_builtin (arglist, target);
16355 case IX86_BUILTIN_VEC_SET_V8HI:
16356 case IX86_BUILTIN_VEC_SET_V4HI:
16357 return ix86_expand_vec_set_builtin (arglist);
16363 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16364 if (d->code == fcode)
16366 /* Compares are treated specially. */
16367 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16368 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16369 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16370 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16371 return ix86_expand_sse_compare (d, arglist, target);
16373 return ix86_expand_binop_builtin (d->icode, arglist, target);
16376 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16377 if (d->code == fcode)
16378 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16380 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16381 if (d->code == fcode)
16382 return ix86_expand_sse_comi (d, arglist, target);
16384 gcc_unreachable ();
16387 /* Store OPERAND to the memory after reload is completed. This means
16388 that we can't easily use assign_stack_local. */
16390 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16394 gcc_assert (reload_completed);
16395 if (TARGET_RED_ZONE)
16397 result = gen_rtx_MEM (mode,
16398 gen_rtx_PLUS (Pmode,
16400 GEN_INT (-RED_ZONE_SIZE)));
16401 emit_move_insn (result, operand);
16403 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16409 operand = gen_lowpart (DImode, operand);
16413 gen_rtx_SET (VOIDmode,
16414 gen_rtx_MEM (DImode,
16415 gen_rtx_PRE_DEC (DImode,
16416 stack_pointer_rtx)),
16420 gcc_unreachable ();
16422 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16431 split_di (&operand, 1, operands, operands + 1);
16433 gen_rtx_SET (VOIDmode,
16434 gen_rtx_MEM (SImode,
16435 gen_rtx_PRE_DEC (Pmode,
16436 stack_pointer_rtx)),
16439 gen_rtx_SET (VOIDmode,
16440 gen_rtx_MEM (SImode,
16441 gen_rtx_PRE_DEC (Pmode,
16442 stack_pointer_rtx)),
16447 /* Store HImodes as SImodes. */
16448 operand = gen_lowpart (SImode, operand);
16452 gen_rtx_SET (VOIDmode,
16453 gen_rtx_MEM (GET_MODE (operand),
16454 gen_rtx_PRE_DEC (SImode,
16455 stack_pointer_rtx)),
16459 gcc_unreachable ();
16461 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16466 /* Free operand from the memory. */
16468 ix86_free_from_memory (enum machine_mode mode)
16470 if (!TARGET_RED_ZONE)
16474 if (mode == DImode || TARGET_64BIT)
16478 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16479 to pop or add instruction if registers are available. */
16480 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16481 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16486 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16487 QImode must go into class Q_REGS.
16488 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16489 movdf to do mem-to-mem moves through integer regs. */
16491 ix86_preferred_reload_class (rtx x, enum reg_class class)
16493 enum machine_mode mode = GET_MODE (x);
16495 /* We're only allowed to return a subclass of CLASS. Many of the
16496 following checks fail for NO_REGS, so eliminate that early. */
16497 if (class == NO_REGS)
16500 /* All classes can load zeros. */
16501 if (x == CONST0_RTX (mode))
16504 /* Force constants into memory if we are loading a (nonzero) constant into
16505 an MMX or SSE register. This is because there are no MMX/SSE instructions
16506 to load from a constant. */
16508 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16511 /* Prefer SSE regs only, if we can use them for math. */
16512 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16513 return SSE_CLASS_P (class) ? class : NO_REGS;
16515 /* Floating-point constants need more complex checks. */
16516 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16518 /* General regs can load everything. */
16519 if (reg_class_subset_p (class, GENERAL_REGS))
16522 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16523 zero above. We only want to wind up preferring 80387 registers if
16524 we plan on doing computation with them. */
16526 && standard_80387_constant_p (x))
16528 /* Limit class to non-sse. */
16529 if (class == FLOAT_SSE_REGS)
16531 if (class == FP_TOP_SSE_REGS)
16533 if (class == FP_SECOND_SSE_REGS)
16534 return FP_SECOND_REG;
16535 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16542 /* Generally when we see PLUS here, it's the function invariant
16543 (plus soft-fp const_int). Which can only be computed into general
16545 if (GET_CODE (x) == PLUS)
16546 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16548 /* QImode constants are easy to load, but non-constant QImode data
16549 must go into Q_REGS. */
16550 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16552 if (reg_class_subset_p (class, Q_REGS))
16554 if (reg_class_subset_p (Q_REGS, class))
16562 /* Discourage putting floating-point values in SSE registers unless
16563 SSE math is being used, and likewise for the 387 registers. */
16565 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16567 enum machine_mode mode = GET_MODE (x);
16569 /* Restrict the output reload class to the register bank that we are doing
16570 math on. If we would like not to return a subset of CLASS, reject this
16571 alternative: if reload cannot do this, it will still use its choice. */
16572 mode = GET_MODE (x);
16573 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16574 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16576 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16578 if (class == FP_TOP_SSE_REGS)
16580 else if (class == FP_SECOND_SSE_REGS)
16581 return FP_SECOND_REG;
16583 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16589 /* If we are copying between general and FP registers, we need a memory
16590 location. The same is true for SSE and MMX registers.
16592 The macro can't work reliably when one of the CLASSES is class containing
16593 registers from multiple units (SSE, MMX, integer). We avoid this by never
16594 combining those units in single alternative in the machine description.
16595 Ensure that this constraint holds to avoid unexpected surprises.
16597 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16598 enforce these sanity checks. */
16601 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16602 enum machine_mode mode, int strict)
16604 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16605 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16606 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16607 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16608 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16609 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16611 gcc_assert (!strict);
16615 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16618 /* ??? This is a lie. We do have moves between mmx/general, and for
16619 mmx/sse2. But by saying we need secondary memory we discourage the
16620 register allocator from using the mmx registers unless needed. */
16621 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16624 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16626 /* SSE1 doesn't have any direct moves from other classes. */
16630 /* If the target says that inter-unit moves are more expensive
16631 than moving through memory, then don't generate them. */
16632 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16635 /* Between SSE and general, we have moves no larger than word size. */
16636 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16639 /* ??? For the cost of one register reformat penalty, we could use
16640 the same instructions to move SFmode and DFmode data, but the
16641 relevant move patterns don't support those alternatives. */
16642 if (mode == SFmode || mode == DFmode)
16649 /* Return true if the registers in CLASS cannot represent the change from
16650 modes FROM to TO. */
16653 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16654 enum reg_class class)
16659 /* x87 registers can't do subreg at all, as all values are reformatted
16660 to extended precision. */
16661 if (MAYBE_FLOAT_CLASS_P (class))
16664 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16666 /* Vector registers do not support QI or HImode loads. If we don't
16667 disallow a change to these modes, reload will assume it's ok to
16668 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16669 the vec_dupv4hi pattern. */
16670 if (GET_MODE_SIZE (from) < 4)
16673 /* Vector registers do not support subreg with nonzero offsets, which
16674 are otherwise valid for integer registers. Since we can't see
16675 whether we have a nonzero offset from here, prohibit all
16676 nonparadoxical subregs changing size. */
16677 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16684 /* Return the cost of moving data from a register in class CLASS1 to
16685 one in class CLASS2.
16687 It is not required that the cost always equal 2 when FROM is the same as TO;
16688 on some machines it is expensive to move between registers if they are not
16689 general registers. */
16692 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16693 enum reg_class class2)
16695 /* In case we require secondary memory, compute cost of the store followed
16696 by load. In order to avoid bad register allocation choices, we need
16697 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16699 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16703 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16704 MEMORY_MOVE_COST (mode, class1, 1));
16705 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16706 MEMORY_MOVE_COST (mode, class2, 1));
16708 /* In case of copying from general_purpose_register we may emit multiple
16709 stores followed by single load causing memory size mismatch stall.
16710 Count this as arbitrarily high cost of 20. */
16711 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16714 /* In the case of FP/MMX moves, the registers actually overlap, and we
16715 have to switch modes in order to treat them differently. */
16716 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16717 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16723 /* Moves between SSE/MMX and integer unit are expensive. */
16724 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16725 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16726 return ix86_cost->mmxsse_to_integer;
16727 if (MAYBE_FLOAT_CLASS_P (class1))
16728 return ix86_cost->fp_move;
16729 if (MAYBE_SSE_CLASS_P (class1))
16730 return ix86_cost->sse_move;
16731 if (MAYBE_MMX_CLASS_P (class1))
16732 return ix86_cost->mmx_move;
16736 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16739 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16741 /* Flags and only flags can only hold CCmode values. */
16742 if (CC_REGNO_P (regno))
16743 return GET_MODE_CLASS (mode) == MODE_CC;
16744 if (GET_MODE_CLASS (mode) == MODE_CC
16745 || GET_MODE_CLASS (mode) == MODE_RANDOM
16746 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16748 if (FP_REGNO_P (regno))
16749 return VALID_FP_MODE_P (mode);
16750 if (SSE_REGNO_P (regno))
16752 /* We implement the move patterns for all vector modes into and
16753 out of SSE registers, even when no operation instructions
16755 return (VALID_SSE_REG_MODE (mode)
16756 || VALID_SSE2_REG_MODE (mode)
16757 || VALID_MMX_REG_MODE (mode)
16758 || VALID_MMX_REG_MODE_3DNOW (mode));
16760 if (MMX_REGNO_P (regno))
16762 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16763 so if the register is available at all, then we can move data of
16764 the given mode into or out of it. */
16765 return (VALID_MMX_REG_MODE (mode)
16766 || VALID_MMX_REG_MODE_3DNOW (mode));
16769 if (mode == QImode)
16771 /* Take care for QImode values - they can be in non-QI regs,
16772 but then they do cause partial register stalls. */
16773 if (regno < 4 || TARGET_64BIT)
16775 if (!TARGET_PARTIAL_REG_STALL)
16777 return reload_in_progress || reload_completed;
16779 /* We handle both integer and floats in the general purpose registers. */
16780 else if (VALID_INT_MODE_P (mode))
16782 else if (VALID_FP_MODE_P (mode))
16784 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16785 on to use that value in smaller contexts, this can easily force a
16786 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16787 supporting DImode, allow it. */
16788 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16794 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16795 tieable integer mode. */
16798 ix86_tieable_integer_mode_p (enum machine_mode mode)
16807 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16810 return TARGET_64BIT;
16817 /* Return true if MODE1 is accessible in a register that can hold MODE2
16818 without copying. That is, all register classes that can hold MODE2
16819 can also hold MODE1. */
16822 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16824 if (mode1 == mode2)
16827 if (ix86_tieable_integer_mode_p (mode1)
16828 && ix86_tieable_integer_mode_p (mode2))
16831 /* MODE2 being XFmode implies fp stack or general regs, which means we
16832 can tie any smaller floating point modes to it. Note that we do not
16833 tie this with TFmode. */
16834 if (mode2 == XFmode)
16835 return mode1 == SFmode || mode1 == DFmode;
16837 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16838 that we can tie it with SFmode. */
16839 if (mode2 == DFmode)
16840 return mode1 == SFmode;
16842 /* If MODE2 is only appropriate for an SSE register, then tie with
16843 any other mode acceptable to SSE registers. */
16844 if (GET_MODE_SIZE (mode2) >= 8
16845 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16846 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16848 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16849 with any other mode acceptable to MMX registers. */
16850 if (GET_MODE_SIZE (mode2) == 8
16851 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
16852 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
16857 /* Return the cost of moving data of mode M between a
16858 register and memory. A value of 2 is the default; this cost is
16859 relative to those in `REGISTER_MOVE_COST'.
16861 If moving between registers and memory is more expensive than
16862 between two registers, you should define this macro to express the
16865 Model also increased moving costs of QImode registers in non
16869 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
16871 if (FLOAT_CLASS_P (class))
16888 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
16890 if (SSE_CLASS_P (class))
16893 switch (GET_MODE_SIZE (mode))
16907 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
16909 if (MMX_CLASS_P (class))
16912 switch (GET_MODE_SIZE (mode))
16923 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
16925 switch (GET_MODE_SIZE (mode))
16929 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
16930 : ix86_cost->movzbl_load);
16932 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
16933 : ix86_cost->int_store[0] + 4);
16936 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
16938 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
16939 if (mode == TFmode)
16941 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
16942 * (((int) GET_MODE_SIZE (mode)
16943 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
16947 /* Compute a (partial) cost for rtx X. Return true if the complete
16948 cost has been computed, and false if subexpressions should be
16949 scanned. In either case, *TOTAL contains the cost result. */
16952 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
16954 enum machine_mode mode = GET_MODE (x);
16962 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
16964 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
16966 else if (flag_pic && SYMBOLIC_CONST (x)
16968 || (!GET_CODE (x) != LABEL_REF
16969 && (GET_CODE (x) != SYMBOL_REF
16970 || !SYMBOL_REF_LOCAL_P (x)))))
16977 if (mode == VOIDmode)
16980 switch (standard_80387_constant_p (x))
16985 default: /* Other constants */
16990 /* Start with (MEM (SYMBOL_REF)), since that's where
16991 it'll probably end up. Add a penalty for size. */
16992 *total = (COSTS_N_INSNS (1)
16993 + (flag_pic != 0 && !TARGET_64BIT)
16994 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17000 /* The zero extensions is often completely free on x86_64, so make
17001 it as cheap as possible. */
17002 if (TARGET_64BIT && mode == DImode
17003 && GET_MODE (XEXP (x, 0)) == SImode)
17005 else if (TARGET_ZERO_EXTEND_WITH_AND)
17006 *total = ix86_cost->add;
17008 *total = ix86_cost->movzx;
17012 *total = ix86_cost->movsx;
17016 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17017 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17019 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17022 *total = ix86_cost->add;
17025 if ((value == 2 || value == 3)
17026 && ix86_cost->lea <= ix86_cost->shift_const)
17028 *total = ix86_cost->lea;
17038 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17040 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17042 if (INTVAL (XEXP (x, 1)) > 32)
17043 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17045 *total = ix86_cost->shift_const * 2;
17049 if (GET_CODE (XEXP (x, 1)) == AND)
17050 *total = ix86_cost->shift_var * 2;
17052 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17057 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17058 *total = ix86_cost->shift_const;
17060 *total = ix86_cost->shift_var;
17065 if (FLOAT_MODE_P (mode))
17067 *total = ix86_cost->fmul;
17072 rtx op0 = XEXP (x, 0);
17073 rtx op1 = XEXP (x, 1);
17075 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17077 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17078 for (nbits = 0; value != 0; value &= value - 1)
17082 /* This is arbitrary. */
17085 /* Compute costs correctly for widening multiplication. */
17086 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17087 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17088 == GET_MODE_SIZE (mode))
17090 int is_mulwiden = 0;
17091 enum machine_mode inner_mode = GET_MODE (op0);
17093 if (GET_CODE (op0) == GET_CODE (op1))
17094 is_mulwiden = 1, op1 = XEXP (op1, 0);
17095 else if (GET_CODE (op1) == CONST_INT)
17097 if (GET_CODE (op0) == SIGN_EXTEND)
17098 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17101 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17105 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17108 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17109 + nbits * ix86_cost->mult_bit
17110 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17119 if (FLOAT_MODE_P (mode))
17120 *total = ix86_cost->fdiv;
17122 *total = ix86_cost->divide[MODE_INDEX (mode)];
17126 if (FLOAT_MODE_P (mode))
17127 *total = ix86_cost->fadd;
17128 else if (GET_MODE_CLASS (mode) == MODE_INT
17129 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17131 if (GET_CODE (XEXP (x, 0)) == PLUS
17132 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17133 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17134 && CONSTANT_P (XEXP (x, 1)))
17136 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17137 if (val == 2 || val == 4 || val == 8)
17139 *total = ix86_cost->lea;
17140 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17141 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17143 *total += rtx_cost (XEXP (x, 1), outer_code);
17147 else if (GET_CODE (XEXP (x, 0)) == MULT
17148 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17150 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17151 if (val == 2 || val == 4 || val == 8)
17153 *total = ix86_cost->lea;
17154 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17155 *total += rtx_cost (XEXP (x, 1), outer_code);
17159 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17161 *total = ix86_cost->lea;
17162 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17163 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17164 *total += rtx_cost (XEXP (x, 1), outer_code);
17171 if (FLOAT_MODE_P (mode))
17173 *total = ix86_cost->fadd;
17181 if (!TARGET_64BIT && mode == DImode)
17183 *total = (ix86_cost->add * 2
17184 + (rtx_cost (XEXP (x, 0), outer_code)
17185 << (GET_MODE (XEXP (x, 0)) != DImode))
17186 + (rtx_cost (XEXP (x, 1), outer_code)
17187 << (GET_MODE (XEXP (x, 1)) != DImode)));
17193 if (FLOAT_MODE_P (mode))
17195 *total = ix86_cost->fchs;
17201 if (!TARGET_64BIT && mode == DImode)
17202 *total = ix86_cost->add * 2;
17204 *total = ix86_cost->add;
17208 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17209 && XEXP (XEXP (x, 0), 1) == const1_rtx
17210 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17211 && XEXP (x, 1) == const0_rtx)
17213 /* This kind of construct is implemented using test[bwl].
17214 Treat it as if we had an AND. */
17215 *total = (ix86_cost->add
17216 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17217 + rtx_cost (const1_rtx, outer_code));
17223 if (!TARGET_SSE_MATH
17225 || (mode == DFmode && !TARGET_SSE2))
17226 /* For standard 80387 constants, raise the cost to prevent
17227 compress_float_constant() to generate load from memory. */
17228 switch (standard_80387_constant_p (XEXP (x, 0)))
17238 *total = (x86_ext_80387_constants & TUNEMASK
17245 if (FLOAT_MODE_P (mode))
17246 *total = ix86_cost->fabs;
17250 if (FLOAT_MODE_P (mode))
17251 *total = ix86_cost->fsqrt;
17255 if (XINT (x, 1) == UNSPEC_TP)
17266 static int current_machopic_label_num;
17268 /* Given a symbol name and its associated stub, write out the
17269 definition of the stub. */
17272 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17274 unsigned int length;
17275 char *binder_name, *symbol_name, lazy_ptr_name[32];
17276 int label = ++current_machopic_label_num;
17278 /* For 64-bit we shouldn't get here. */
17279 gcc_assert (!TARGET_64BIT);
17281 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17282 symb = (*targetm.strip_name_encoding) (symb);
17284 length = strlen (stub);
17285 binder_name = alloca (length + 32);
17286 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17288 length = strlen (symb);
17289 symbol_name = alloca (length + 32);
17290 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17292 sprintf (lazy_ptr_name, "L%d$lz", label);
17295 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17297 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17299 fprintf (file, "%s:\n", stub);
17300 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17304 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17305 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17306 fprintf (file, "\tjmp\t*%%edx\n");
17309 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17311 fprintf (file, "%s:\n", binder_name);
17315 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17316 fprintf (file, "\tpushl\t%%eax\n");
17319 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17321 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17323 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17324 fprintf (file, "%s:\n", lazy_ptr_name);
17325 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17326 fprintf (file, "\t.long %s\n", binder_name);
17330 darwin_x86_file_end (void)
17332 darwin_file_end ();
17335 #endif /* TARGET_MACHO */
17337 /* Order the registers for register allocator. */
17340 x86_order_regs_for_local_alloc (void)
17345 /* First allocate the local general purpose registers. */
17346 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17347 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17348 reg_alloc_order [pos++] = i;
17350 /* Global general purpose registers. */
17351 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17352 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17353 reg_alloc_order [pos++] = i;
17355 /* x87 registers come first in case we are doing FP math
17357 if (!TARGET_SSE_MATH)
17358 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17359 reg_alloc_order [pos++] = i;
17361 /* SSE registers. */
17362 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17363 reg_alloc_order [pos++] = i;
17364 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17365 reg_alloc_order [pos++] = i;
17367 /* x87 registers. */
17368 if (TARGET_SSE_MATH)
17369 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17370 reg_alloc_order [pos++] = i;
17372 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17373 reg_alloc_order [pos++] = i;
17375 /* Initialize the rest of array as we do not allocate some registers
17377 while (pos < FIRST_PSEUDO_REGISTER)
17378 reg_alloc_order [pos++] = 0;
17381 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17382 struct attribute_spec.handler. */
17384 ix86_handle_struct_attribute (tree *node, tree name,
17385 tree args ATTRIBUTE_UNUSED,
17386 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17389 if (DECL_P (*node))
17391 if (TREE_CODE (*node) == TYPE_DECL)
17392 type = &TREE_TYPE (*node);
17397 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17398 || TREE_CODE (*type) == UNION_TYPE)))
17400 warning (OPT_Wattributes, "%qs attribute ignored",
17401 IDENTIFIER_POINTER (name));
17402 *no_add_attrs = true;
17405 else if ((is_attribute_p ("ms_struct", name)
17406 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17407 || ((is_attribute_p ("gcc_struct", name)
17408 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17410 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17411 IDENTIFIER_POINTER (name));
17412 *no_add_attrs = true;
17419 ix86_ms_bitfield_layout_p (tree record_type)
17421 return (TARGET_MS_BITFIELD_LAYOUT &&
17422 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17423 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17426 /* Returns an expression indicating where the this parameter is
17427 located on entry to the FUNCTION. */
17430 x86_this_parameter (tree function)
17432 tree type = TREE_TYPE (function);
17436 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17437 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17440 if (ix86_function_regparm (type, function) > 0)
17444 parm = TYPE_ARG_TYPES (type);
17445 /* Figure out whether or not the function has a variable number of
17447 for (; parm; parm = TREE_CHAIN (parm))
17448 if (TREE_VALUE (parm) == void_type_node)
17450 /* If not, the this parameter is in the first argument. */
17454 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17456 return gen_rtx_REG (SImode, regno);
17460 if (aggregate_value_p (TREE_TYPE (type), type))
17461 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17463 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17466 /* Determine whether x86_output_mi_thunk can succeed. */
17469 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17470 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17471 HOST_WIDE_INT vcall_offset, tree function)
17473 /* 64-bit can handle anything. */
17477 /* For 32-bit, everything's fine if we have one free register. */
17478 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17481 /* Need a free register for vcall_offset. */
17485 /* Need a free register for GOT references. */
17486 if (flag_pic && !(*targetm.binds_local_p) (function))
17489 /* Otherwise ok. */
17493 /* Output the assembler code for a thunk function. THUNK_DECL is the
17494 declaration for the thunk function itself, FUNCTION is the decl for
17495 the target function. DELTA is an immediate constant offset to be
17496 added to THIS. If VCALL_OFFSET is nonzero, the word at
17497 *(*this + vcall_offset) should be added to THIS. */
17500 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17501 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17502 HOST_WIDE_INT vcall_offset, tree function)
17505 rtx this = x86_this_parameter (function);
17508 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17509 pull it in now and let DELTA benefit. */
17512 else if (vcall_offset)
17514 /* Put the this parameter into %eax. */
17516 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17517 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17520 this_reg = NULL_RTX;
17522 /* Adjust the this parameter by a fixed constant. */
17525 xops[0] = GEN_INT (delta);
17526 xops[1] = this_reg ? this_reg : this;
17529 if (!x86_64_general_operand (xops[0], DImode))
17531 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17533 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17537 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17540 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17543 /* Adjust the this parameter by a value stored in the vtable. */
17547 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17550 int tmp_regno = 2 /* ECX */;
17551 if (lookup_attribute ("fastcall",
17552 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17553 tmp_regno = 0 /* EAX */;
17554 tmp = gen_rtx_REG (SImode, tmp_regno);
17557 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17560 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17562 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17564 /* Adjust the this parameter. */
17565 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17566 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17568 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17569 xops[0] = GEN_INT (vcall_offset);
17571 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17572 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17574 xops[1] = this_reg;
17576 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17578 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17581 /* If necessary, drop THIS back to its stack slot. */
17582 if (this_reg && this_reg != this)
17584 xops[0] = this_reg;
17586 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17589 xops[0] = XEXP (DECL_RTL (function), 0);
17592 if (!flag_pic || (*targetm.binds_local_p) (function))
17593 output_asm_insn ("jmp\t%P0", xops);
17596 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17597 tmp = gen_rtx_CONST (Pmode, tmp);
17598 tmp = gen_rtx_MEM (QImode, tmp);
17600 output_asm_insn ("jmp\t%A0", xops);
17605 if (!flag_pic || (*targetm.binds_local_p) (function))
17606 output_asm_insn ("jmp\t%P0", xops);
17611 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17612 tmp = (gen_rtx_SYMBOL_REF
17614 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17615 tmp = gen_rtx_MEM (QImode, tmp);
17617 output_asm_insn ("jmp\t%0", xops);
17620 #endif /* TARGET_MACHO */
17622 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17623 output_set_got (tmp, NULL_RTX);
17626 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17627 output_asm_insn ("jmp\t{*}%1", xops);
17633 x86_file_start (void)
17635 default_file_start ();
17637 darwin_file_start ();
17639 if (X86_FILE_START_VERSION_DIRECTIVE)
17640 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17641 if (X86_FILE_START_FLTUSED)
17642 fputs ("\t.global\t__fltused\n", asm_out_file);
17643 if (ix86_asm_dialect == ASM_INTEL)
17644 fputs ("\t.intel_syntax\n", asm_out_file);
17648 x86_field_alignment (tree field, int computed)
17650 enum machine_mode mode;
17651 tree type = TREE_TYPE (field);
17653 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17655 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17656 ? get_inner_array_type (type) : type);
17657 if (mode == DFmode || mode == DCmode
17658 || GET_MODE_CLASS (mode) == MODE_INT
17659 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17660 return MIN (32, computed);
17664 /* Output assembler code to FILE to increment profiler label # LABELNO
17665 for profiling a function entry. */
17667 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17672 #ifndef NO_PROFILE_COUNTERS
17673 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17675 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17679 #ifndef NO_PROFILE_COUNTERS
17680 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17682 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17686 #ifndef NO_PROFILE_COUNTERS
17687 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17688 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17690 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17694 #ifndef NO_PROFILE_COUNTERS
17695 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17696 PROFILE_COUNT_REGISTER);
17698 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17702 /* We don't have exact information about the insn sizes, but we may assume
17703 quite safely that we are informed about all 1 byte insns and memory
17704 address sizes. This is enough to eliminate unnecessary padding in
17708 min_insn_size (rtx insn)
17712 if (!INSN_P (insn) || !active_insn_p (insn))
17715 /* Discard alignments we've emit and jump instructions. */
17716 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17717 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17719 if (GET_CODE (insn) == JUMP_INSN
17720 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17721 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17724 /* Important case - calls are always 5 bytes.
17725 It is common to have many calls in the row. */
17726 if (GET_CODE (insn) == CALL_INSN
17727 && symbolic_reference_mentioned_p (PATTERN (insn))
17728 && !SIBLING_CALL_P (insn))
17730 if (get_attr_length (insn) <= 1)
17733 /* For normal instructions we may rely on the sizes of addresses
17734 and the presence of symbol to require 4 bytes of encoding.
17735 This is not the case for jumps where references are PC relative. */
17736 if (GET_CODE (insn) != JUMP_INSN)
17738 l = get_attr_length_address (insn);
17739 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17748 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17752 ix86_avoid_jump_misspredicts (void)
17754 rtx insn, start = get_insns ();
17755 int nbytes = 0, njumps = 0;
17758 /* Look for all minimal intervals of instructions containing 4 jumps.
17759 The intervals are bounded by START and INSN. NBYTES is the total
17760 size of instructions in the interval including INSN and not including
17761 START. When the NBYTES is smaller than 16 bytes, it is possible
17762 that the end of START and INSN ends up in the same 16byte page.
17764 The smallest offset in the page INSN can start is the case where START
17765 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17766 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17768 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17771 nbytes += min_insn_size (insn);
17773 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17774 INSN_UID (insn), min_insn_size (insn));
17775 if ((GET_CODE (insn) == JUMP_INSN
17776 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17777 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17778 || GET_CODE (insn) == CALL_INSN)
17785 start = NEXT_INSN (start);
17786 if ((GET_CODE (start) == JUMP_INSN
17787 && GET_CODE (PATTERN (start)) != ADDR_VEC
17788 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17789 || GET_CODE (start) == CALL_INSN)
17790 njumps--, isjump = 1;
17793 nbytes -= min_insn_size (start);
17795 gcc_assert (njumps >= 0);
17797 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17798 INSN_UID (start), INSN_UID (insn), nbytes);
17800 if (njumps == 3 && isjump && nbytes < 16)
17802 int padsize = 15 - nbytes + min_insn_size (insn);
17805 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17806 INSN_UID (insn), padsize);
17807 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17812 /* AMD Athlon works faster
17813 when RET is not destination of conditional jump or directly preceded
17814 by other jump instruction. We avoid the penalty by inserting NOP just
17815 before the RET instructions in such cases. */
17817 ix86_pad_returns (void)
17822 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17824 basic_block bb = e->src;
17825 rtx ret = BB_END (bb);
17827 bool replace = false;
17829 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17830 || !maybe_hot_bb_p (bb))
17832 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17833 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17835 if (prev && GET_CODE (prev) == CODE_LABEL)
17840 FOR_EACH_EDGE (e, ei, bb->preds)
17841 if (EDGE_FREQUENCY (e) && e->src->index >= 0
17842 && !(e->flags & EDGE_FALLTHRU))
17847 prev = prev_active_insn (ret);
17849 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17850 || GET_CODE (prev) == CALL_INSN))
17852 /* Empty functions get branch mispredict even when the jump destination
17853 is not visible to us. */
17854 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
17859 emit_insn_before (gen_return_internal_long (), ret);
17865 /* Implement machine specific optimizations. We implement padding of returns
17866 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
17870 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
17871 ix86_pad_returns ();
17872 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
17873 ix86_avoid_jump_misspredicts ();
17876 /* Return nonzero when QImode register that must be represented via REX prefix
17879 x86_extended_QIreg_mentioned_p (rtx insn)
17882 extract_insn_cached (insn);
17883 for (i = 0; i < recog_data.n_operands; i++)
17884 if (REG_P (recog_data.operand[i])
17885 && REGNO (recog_data.operand[i]) >= 4)
17890 /* Return nonzero when P points to register encoded via REX prefix.
17891 Called via for_each_rtx. */
17893 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
17895 unsigned int regno;
17898 regno = REGNO (*p);
17899 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
17902 /* Return true when INSN mentions register that must be encoded using REX
17905 x86_extended_reg_mentioned_p (rtx insn)
17907 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
17910 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
17911 optabs would emit if we didn't have TFmode patterns. */
17914 x86_emit_floatuns (rtx operands[2])
17916 rtx neglab, donelab, i0, i1, f0, in, out;
17917 enum machine_mode mode, inmode;
17919 inmode = GET_MODE (operands[1]);
17920 gcc_assert (inmode == SImode || inmode == DImode);
17923 in = force_reg (inmode, operands[1]);
17924 mode = GET_MODE (out);
17925 neglab = gen_label_rtx ();
17926 donelab = gen_label_rtx ();
17927 i1 = gen_reg_rtx (Pmode);
17928 f0 = gen_reg_rtx (mode);
17930 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
17932 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
17933 emit_jump_insn (gen_jump (donelab));
17936 emit_label (neglab);
17938 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17939 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17940 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
17941 expand_float (f0, i0, 0);
17942 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
17944 emit_label (donelab);
17947 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17948 with all elements equal to VAR. Return true if successful. */
17951 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
17952 rtx target, rtx val)
17954 enum machine_mode smode, wsmode, wvmode;
17969 val = force_reg (GET_MODE_INNER (mode), val);
17970 x = gen_rtx_VEC_DUPLICATE (mode, val);
17971 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17977 if (TARGET_SSE || TARGET_3DNOW_A)
17979 val = gen_lowpart (SImode, val);
17980 x = gen_rtx_TRUNCATE (HImode, val);
17981 x = gen_rtx_VEC_DUPLICATE (mode, x);
17982 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18004 /* Extend HImode to SImode using a paradoxical SUBREG. */
18005 tmp1 = gen_reg_rtx (SImode);
18006 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18007 /* Insert the SImode value as low element of V4SImode vector. */
18008 tmp2 = gen_reg_rtx (V4SImode);
18009 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18010 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18011 CONST0_RTX (V4SImode),
18013 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18014 /* Cast the V4SImode vector back to a V8HImode vector. */
18015 tmp1 = gen_reg_rtx (V8HImode);
18016 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18017 /* Duplicate the low short through the whole low SImode word. */
18018 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18019 /* Cast the V8HImode vector back to a V4SImode vector. */
18020 tmp2 = gen_reg_rtx (V4SImode);
18021 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18022 /* Replicate the low element of the V4SImode vector. */
18023 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18024 /* Cast the V2SImode back to V8HImode, and store in target. */
18025 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18036 /* Extend QImode to SImode using a paradoxical SUBREG. */
18037 tmp1 = gen_reg_rtx (SImode);
18038 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18039 /* Insert the SImode value as low element of V4SImode vector. */
18040 tmp2 = gen_reg_rtx (V4SImode);
18041 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18042 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18043 CONST0_RTX (V4SImode),
18045 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18046 /* Cast the V4SImode vector back to a V16QImode vector. */
18047 tmp1 = gen_reg_rtx (V16QImode);
18048 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18049 /* Duplicate the low byte through the whole low SImode word. */
18050 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18051 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18052 /* Cast the V16QImode vector back to a V4SImode vector. */
18053 tmp2 = gen_reg_rtx (V4SImode);
18054 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18055 /* Replicate the low element of the V4SImode vector. */
18056 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18057 /* Cast the V2SImode back to V16QImode, and store in target. */
18058 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18066 /* Replicate the value once into the next wider mode and recurse. */
18067 val = convert_modes (wsmode, smode, val, true);
18068 x = expand_simple_binop (wsmode, ASHIFT, val,
18069 GEN_INT (GET_MODE_BITSIZE (smode)),
18070 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18071 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18073 x = gen_reg_rtx (wvmode);
18074 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18075 gcc_unreachable ();
18076 emit_move_insn (target, gen_lowpart (mode, x));
18084 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18085 whose ONE_VAR element is VAR, and other elements are zero. Return true
18089 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18090 rtx target, rtx var, int one_var)
18092 enum machine_mode vsimode;
18108 var = force_reg (GET_MODE_INNER (mode), var);
18109 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18110 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18115 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18116 new_target = gen_reg_rtx (mode);
18118 new_target = target;
18119 var = force_reg (GET_MODE_INNER (mode), var);
18120 x = gen_rtx_VEC_DUPLICATE (mode, var);
18121 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18122 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18125 /* We need to shuffle the value to the correct position, so
18126 create a new pseudo to store the intermediate result. */
18128 /* With SSE2, we can use the integer shuffle insns. */
18129 if (mode != V4SFmode && TARGET_SSE2)
18131 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18133 GEN_INT (one_var == 1 ? 0 : 1),
18134 GEN_INT (one_var == 2 ? 0 : 1),
18135 GEN_INT (one_var == 3 ? 0 : 1)));
18136 if (target != new_target)
18137 emit_move_insn (target, new_target);
18141 /* Otherwise convert the intermediate result to V4SFmode and
18142 use the SSE1 shuffle instructions. */
18143 if (mode != V4SFmode)
18145 tmp = gen_reg_rtx (V4SFmode);
18146 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18151 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18153 GEN_INT (one_var == 1 ? 0 : 1),
18154 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18155 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18157 if (mode != V4SFmode)
18158 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18159 else if (tmp != target)
18160 emit_move_insn (target, tmp);
18162 else if (target != new_target)
18163 emit_move_insn (target, new_target);
18168 vsimode = V4SImode;
18174 vsimode = V2SImode;
18180 /* Zero extend the variable element to SImode and recurse. */
18181 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18183 x = gen_reg_rtx (vsimode);
18184 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18186 gcc_unreachable ();
18188 emit_move_insn (target, gen_lowpart (mode, x));
18196 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18197 consisting of the values in VALS. It is known that all elements
18198 except ONE_VAR are constants. Return true if successful. */
18201 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18202 rtx target, rtx vals, int one_var)
18204 rtx var = XVECEXP (vals, 0, one_var);
18205 enum machine_mode wmode;
18208 const_vec = copy_rtx (vals);
18209 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18210 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18218 /* For the two element vectors, it's just as easy to use
18219 the general case. */
18235 /* There's no way to set one QImode entry easily. Combine
18236 the variable value with its adjacent constant value, and
18237 promote to an HImode set. */
18238 x = XVECEXP (vals, 0, one_var ^ 1);
18241 var = convert_modes (HImode, QImode, var, true);
18242 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18243 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18244 x = GEN_INT (INTVAL (x) & 0xff);
18248 var = convert_modes (HImode, QImode, var, true);
18249 x = gen_int_mode (INTVAL (x) << 8, HImode);
18251 if (x != const0_rtx)
18252 var = expand_simple_binop (HImode, IOR, var, x, var,
18253 1, OPTAB_LIB_WIDEN);
18255 x = gen_reg_rtx (wmode);
18256 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18257 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18259 emit_move_insn (target, gen_lowpart (mode, x));
18266 emit_move_insn (target, const_vec);
18267 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18271 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18272 all values variable, and none identical. */
18275 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18276 rtx target, rtx vals)
18278 enum machine_mode half_mode = GET_MODE_INNER (mode);
18279 rtx op0 = NULL, op1 = NULL;
18280 bool use_vec_concat = false;
18286 if (!mmx_ok && !TARGET_SSE)
18292 /* For the two element vectors, we always implement VEC_CONCAT. */
18293 op0 = XVECEXP (vals, 0, 0);
18294 op1 = XVECEXP (vals, 0, 1);
18295 use_vec_concat = true;
18299 half_mode = V2SFmode;
18302 half_mode = V2SImode;
18308 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18309 Recurse to load the two halves. */
18311 op0 = gen_reg_rtx (half_mode);
18312 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18313 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18315 op1 = gen_reg_rtx (half_mode);
18316 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18317 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18319 use_vec_concat = true;
18330 gcc_unreachable ();
18333 if (use_vec_concat)
18335 if (!register_operand (op0, half_mode))
18336 op0 = force_reg (half_mode, op0);
18337 if (!register_operand (op1, half_mode))
18338 op1 = force_reg (half_mode, op1);
18340 emit_insn (gen_rtx_SET (VOIDmode, target,
18341 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18345 int i, j, n_elts, n_words, n_elt_per_word;
18346 enum machine_mode inner_mode;
18347 rtx words[4], shift;
18349 inner_mode = GET_MODE_INNER (mode);
18350 n_elts = GET_MODE_NUNITS (mode);
18351 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18352 n_elt_per_word = n_elts / n_words;
18353 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18355 for (i = 0; i < n_words; ++i)
18357 rtx word = NULL_RTX;
18359 for (j = 0; j < n_elt_per_word; ++j)
18361 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18362 elt = convert_modes (word_mode, inner_mode, elt, true);
18368 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18369 word, 1, OPTAB_LIB_WIDEN);
18370 word = expand_simple_binop (word_mode, IOR, word, elt,
18371 word, 1, OPTAB_LIB_WIDEN);
18379 emit_move_insn (target, gen_lowpart (mode, words[0]));
18380 else if (n_words == 2)
18382 rtx tmp = gen_reg_rtx (mode);
18383 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18384 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18385 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18386 emit_move_insn (target, tmp);
18388 else if (n_words == 4)
18390 rtx tmp = gen_reg_rtx (V4SImode);
18391 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18392 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18393 emit_move_insn (target, gen_lowpart (mode, tmp));
18396 gcc_unreachable ();
18400 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18401 instructions unless MMX_OK is true. */
18404 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18406 enum machine_mode mode = GET_MODE (target);
18407 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18408 int n_elts = GET_MODE_NUNITS (mode);
18409 int n_var = 0, one_var = -1;
18410 bool all_same = true, all_const_zero = true;
18414 for (i = 0; i < n_elts; ++i)
18416 x = XVECEXP (vals, 0, i);
18417 if (!CONSTANT_P (x))
18418 n_var++, one_var = i;
18419 else if (x != CONST0_RTX (inner_mode))
18420 all_const_zero = false;
18421 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18425 /* Constants are best loaded from the constant pool. */
18428 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18432 /* If all values are identical, broadcast the value. */
18434 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18435 XVECEXP (vals, 0, 0)))
18438 /* Values where only one field is non-constant are best loaded from
18439 the pool and overwritten via move later. */
18443 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18444 XVECEXP (vals, 0, one_var),
18448 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18452 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18456 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18458 enum machine_mode mode = GET_MODE (target);
18459 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18460 bool use_vec_merge = false;
18469 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18470 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18472 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18474 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18475 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18485 /* For the two element vectors, we implement a VEC_CONCAT with
18486 the extraction of the other element. */
18488 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18489 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18492 op0 = val, op1 = tmp;
18494 op0 = tmp, op1 = val;
18496 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18497 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18505 use_vec_merge = true;
18509 /* tmp = target = A B C D */
18510 tmp = copy_to_reg (target);
18511 /* target = A A B B */
18512 emit_insn (gen_sse_unpcklps (target, target, target));
18513 /* target = X A B B */
18514 ix86_expand_vector_set (false, target, val, 0);
18515 /* target = A X C D */
18516 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18517 GEN_INT (1), GEN_INT (0),
18518 GEN_INT (2+4), GEN_INT (3+4)));
18522 /* tmp = target = A B C D */
18523 tmp = copy_to_reg (target);
18524 /* tmp = X B C D */
18525 ix86_expand_vector_set (false, tmp, val, 0);
18526 /* target = A B X D */
18527 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18528 GEN_INT (0), GEN_INT (1),
18529 GEN_INT (0+4), GEN_INT (3+4)));
18533 /* tmp = target = A B C D */
18534 tmp = copy_to_reg (target);
18535 /* tmp = X B C D */
18536 ix86_expand_vector_set (false, tmp, val, 0);
18537 /* target = A B X D */
18538 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18539 GEN_INT (0), GEN_INT (1),
18540 GEN_INT (2+4), GEN_INT (0+4)));
18544 gcc_unreachable ();
18549 /* Element 0 handled by vec_merge below. */
18552 use_vec_merge = true;
18558 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18559 store into element 0, then shuffle them back. */
18563 order[0] = GEN_INT (elt);
18564 order[1] = const1_rtx;
18565 order[2] = const2_rtx;
18566 order[3] = GEN_INT (3);
18567 order[elt] = const0_rtx;
18569 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18570 order[1], order[2], order[3]));
18572 ix86_expand_vector_set (false, target, val, 0);
18574 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18575 order[1], order[2], order[3]));
18579 /* For SSE1, we have to reuse the V4SF code. */
18580 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18581 gen_lowpart (SFmode, val), elt);
18586 use_vec_merge = TARGET_SSE2;
18589 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18600 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18601 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18602 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18606 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18608 emit_move_insn (mem, target);
18610 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18611 emit_move_insn (tmp, val);
18613 emit_move_insn (target, mem);
18618 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18620 enum machine_mode mode = GET_MODE (vec);
18621 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18622 bool use_vec_extr = false;
18635 use_vec_extr = true;
18647 tmp = gen_reg_rtx (mode);
18648 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18649 GEN_INT (elt), GEN_INT (elt),
18650 GEN_INT (elt+4), GEN_INT (elt+4)));
18654 tmp = gen_reg_rtx (mode);
18655 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18659 gcc_unreachable ();
18662 use_vec_extr = true;
18677 tmp = gen_reg_rtx (mode);
18678 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18679 GEN_INT (elt), GEN_INT (elt),
18680 GEN_INT (elt), GEN_INT (elt)));
18684 tmp = gen_reg_rtx (mode);
18685 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18689 gcc_unreachable ();
18692 use_vec_extr = true;
18697 /* For SSE1, we have to reuse the V4SF code. */
18698 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18699 gen_lowpart (V4SFmode, vec), elt);
18705 use_vec_extr = TARGET_SSE2;
18708 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18713 /* ??? Could extract the appropriate HImode element and shift. */
18720 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18721 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18723 /* Let the rtl optimizers know about the zero extension performed. */
18724 if (inner_mode == HImode)
18726 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18727 target = gen_lowpart (SImode, target);
18730 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18734 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18736 emit_move_insn (mem, vec);
18738 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18739 emit_move_insn (target, tmp);
18743 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18744 pattern to reduce; DEST is the destination; IN is the input vector. */
18747 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18749 rtx tmp1, tmp2, tmp3;
18751 tmp1 = gen_reg_rtx (V4SFmode);
18752 tmp2 = gen_reg_rtx (V4SFmode);
18753 tmp3 = gen_reg_rtx (V4SFmode);
18755 emit_insn (gen_sse_movhlps (tmp1, in, in));
18756 emit_insn (fn (tmp2, tmp1, in));
18758 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18759 GEN_INT (1), GEN_INT (1),
18760 GEN_INT (1+4), GEN_INT (1+4)));
18761 emit_insn (fn (dest, tmp2, tmp3));
18764 /* Target hook for scalar_mode_supported_p. */
18766 ix86_scalar_mode_supported_p (enum machine_mode mode)
18768 if (DECIMAL_FLOAT_MODE_P (mode))
18771 return default_scalar_mode_supported_p (mode);
18774 /* Implements target hook vector_mode_supported_p. */
18776 ix86_vector_mode_supported_p (enum machine_mode mode)
18778 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18780 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18782 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18784 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18789 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18791 We do this in the new i386 backend to maintain source compatibility
18792 with the old cc0-based compiler. */
18795 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18796 tree inputs ATTRIBUTE_UNUSED,
18799 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18801 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18803 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18808 /* Return true if this goes in small data/bss. */
18811 ix86_in_large_data_p (tree exp)
18813 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18816 /* Functions are never large data. */
18817 if (TREE_CODE (exp) == FUNCTION_DECL)
18820 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18822 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18823 if (strcmp (section, ".ldata") == 0
18824 || strcmp (section, ".lbss") == 0)
18830 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18832 /* If this is an incomplete type with size 0, then we can't put it
18833 in data because it might be too big when completed. */
18834 if (!size || size > ix86_section_threshold)
18841 ix86_encode_section_info (tree decl, rtx rtl, int first)
18843 default_encode_section_info (decl, rtl, first);
18845 if (TREE_CODE (decl) == VAR_DECL
18846 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18847 && ix86_in_large_data_p (decl))
18848 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
18851 /* Worker function for REVERSE_CONDITION. */
18854 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
18856 return (mode != CCFPmode && mode != CCFPUmode
18857 ? reverse_condition (code)
18858 : reverse_condition_maybe_unordered (code));
18861 /* Output code to perform an x87 FP register move, from OPERANDS[1]
18865 output_387_reg_move (rtx insn, rtx *operands)
18867 if (REG_P (operands[1])
18868 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18870 if (REGNO (operands[0]) == FIRST_STACK_REG)
18871 return output_387_ffreep (operands, 0);
18872 return "fstp\t%y0";
18874 if (STACK_TOP_P (operands[0]))
18875 return "fld%z1\t%y1";
18879 /* Output code to perform a conditional jump to LABEL, if C2 flag in
18880 FP status register is set. */
18883 ix86_emit_fp_unordered_jump (rtx label)
18885 rtx reg = gen_reg_rtx (HImode);
18888 emit_insn (gen_x86_fnstsw_1 (reg));
18890 if (TARGET_USE_SAHF)
18892 emit_insn (gen_x86_sahf_1 (reg));
18894 temp = gen_rtx_REG (CCmode, FLAGS_REG);
18895 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
18899 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
18901 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18902 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
18905 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
18906 gen_rtx_LABEL_REF (VOIDmode, label),
18908 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
18909 emit_jump_insn (temp);
18912 /* Output code to perform a log1p XFmode calculation. */
18914 void ix86_emit_i387_log1p (rtx op0, rtx op1)
18916 rtx label1 = gen_label_rtx ();
18917 rtx label2 = gen_label_rtx ();
18919 rtx tmp = gen_reg_rtx (XFmode);
18920 rtx tmp2 = gen_reg_rtx (XFmode);
18922 emit_insn (gen_absxf2 (tmp, op1));
18923 emit_insn (gen_cmpxf (tmp,
18924 CONST_DOUBLE_FROM_REAL_VALUE (
18925 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
18927 emit_jump_insn (gen_bge (label1));
18929 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18930 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
18931 emit_jump (label2);
18933 emit_label (label1);
18934 emit_move_insn (tmp, CONST1_RTX (XFmode));
18935 emit_insn (gen_addxf3 (tmp, op1, tmp));
18936 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18937 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
18939 emit_label (label2);
18942 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
18945 i386_solaris_elf_named_section (const char *name, unsigned int flags,
18948 /* With Binutils 2.15, the "@unwind" marker must be specified on
18949 every occurrence of the ".eh_frame" section, not just the first
18952 && strcmp (name, ".eh_frame") == 0)
18954 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
18955 flags & SECTION_WRITE ? "aw" : "a");
18958 default_elf_asm_named_section (name, flags, decl);
18961 /* Return the mangling of TYPE if it is an extended fundamental type. */
18963 static const char *
18964 ix86_mangle_fundamental_type (tree type)
18966 switch (TYPE_MODE (type))
18969 /* __float128 is "g". */
18972 /* "long double" or __float80 is "e". */
18979 /* For 32-bit code we can save PIC register setup by using
18980 __stack_chk_fail_local hidden function instead of calling
18981 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
18982 register, so it is better to call __stack_chk_fail directly. */
18985 ix86_stack_protect_fail (void)
18987 return TARGET_64BIT
18988 ? default_external_stack_protect_fail ()
18989 : default_hidden_stack_protect_fail ();
18992 /* Select a format to encode pointers in exception handling data. CODE
18993 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
18994 true if the symbol may be affected by dynamic relocations.
18996 ??? All x86 object file formats are capable of representing this.
18997 After all, the relocation needed is the same as for the call insn.
18998 Whether or not a particular assembler allows us to enter such, I
18999 guess we'll have to see. */
19001 asm_preferred_eh_data_format (int code, int global)
19005 int type = DW_EH_PE_sdata8;
19007 || ix86_cmodel == CM_SMALL_PIC
19008 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19009 type = DW_EH_PE_sdata4;
19010 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19012 if (ix86_cmodel == CM_SMALL
19013 || (ix86_cmodel == CM_MEDIUM && code))
19014 return DW_EH_PE_udata4;
19015 return DW_EH_PE_absptr;
19018 #include "gt-i386.h"