1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of l1 cache */
116 0, /* size of l2 cache */
117 0, /* size of prefetch block */
118 0, /* number of parallel prefetches */
120 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
121 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
122 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
123 COSTS_N_BYTES (2), /* cost of FABS instruction. */
124 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
125 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
128 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
132 /* Processor costs (relative to an add) */
134 struct processor_costs i386_cost = { /* 386 specific costs */
135 COSTS_N_INSNS (1), /* cost of an add instruction */
136 COSTS_N_INSNS (1), /* cost of a lea instruction */
137 COSTS_N_INSNS (3), /* variable shift costs */
138 COSTS_N_INSNS (2), /* constant shift costs */
139 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
140 COSTS_N_INSNS (6), /* HI */
141 COSTS_N_INSNS (6), /* SI */
142 COSTS_N_INSNS (6), /* DI */
143 COSTS_N_INSNS (6)}, /* other */
144 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
145 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
146 COSTS_N_INSNS (23), /* HI */
147 COSTS_N_INSNS (23), /* SI */
148 COSTS_N_INSNS (23), /* DI */
149 COSTS_N_INSNS (23)}, /* other */
150 COSTS_N_INSNS (3), /* cost of movsx */
151 COSTS_N_INSNS (2), /* cost of movzx */
152 15, /* "large" insn */
154 4, /* cost for loading QImode using movzbl */
155 {2, 4, 2}, /* cost of loading integer registers
156 in QImode, HImode and SImode.
157 Relative to reg-reg move (2). */
158 {2, 4, 2}, /* cost of storing integer registers */
159 2, /* cost of reg,reg fld/fst */
160 {8, 8, 8}, /* cost of loading fp registers
161 in SFmode, DFmode and XFmode */
162 {8, 8, 8}, /* cost of storing fp registers
163 in SFmode, DFmode and XFmode */
164 2, /* cost of moving MMX register */
165 {4, 8}, /* cost of loading MMX registers
166 in SImode and DImode */
167 {4, 8}, /* cost of storing MMX registers
168 in SImode and DImode */
169 2, /* cost of moving SSE register */
170 {4, 8, 16}, /* cost of loading SSE registers
171 in SImode, DImode and TImode */
172 {4, 8, 16}, /* cost of storing SSE registers
173 in SImode, DImode and TImode */
174 3, /* MMX or SSE register to integer */
175 0, /* size of l1 cache */
176 0, /* size of l2 cache */
177 0, /* size of prefetch block */
178 0, /* number of parallel prefetches */
180 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
181 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
182 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
183 COSTS_N_INSNS (22), /* cost of FABS instruction. */
184 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
185 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
186 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
187 DUMMY_STRINGOP_ALGS},
188 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
189 DUMMY_STRINGOP_ALGS},
193 struct processor_costs i486_cost = { /* 486 specific costs */
194 COSTS_N_INSNS (1), /* cost of an add instruction */
195 COSTS_N_INSNS (1), /* cost of a lea instruction */
196 COSTS_N_INSNS (3), /* variable shift costs */
197 COSTS_N_INSNS (2), /* constant shift costs */
198 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
199 COSTS_N_INSNS (12), /* HI */
200 COSTS_N_INSNS (12), /* SI */
201 COSTS_N_INSNS (12), /* DI */
202 COSTS_N_INSNS (12)}, /* other */
203 1, /* cost of multiply per each bit set */
204 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
205 COSTS_N_INSNS (40), /* HI */
206 COSTS_N_INSNS (40), /* SI */
207 COSTS_N_INSNS (40), /* DI */
208 COSTS_N_INSNS (40)}, /* other */
209 COSTS_N_INSNS (3), /* cost of movsx */
210 COSTS_N_INSNS (2), /* cost of movzx */
211 15, /* "large" insn */
213 4, /* cost for loading QImode using movzbl */
214 {2, 4, 2}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 4, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {8, 8, 8}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {8, 8, 8}, /* cost of storing fp registers
222 in SFmode, DFmode and XFmode */
223 2, /* cost of moving MMX register */
224 {4, 8}, /* cost of loading MMX registers
225 in SImode and DImode */
226 {4, 8}, /* cost of storing MMX registers
227 in SImode and DImode */
228 2, /* cost of moving SSE register */
229 {4, 8, 16}, /* cost of loading SSE registers
230 in SImode, DImode and TImode */
231 {4, 8, 16}, /* cost of storing SSE registers
232 in SImode, DImode and TImode */
233 3, /* MMX or SSE register to integer */
234 4, /* size of l1 cache. 486 has 8kB cache
235 shared for code and data, so 4kB is
236 not really precise. */
237 4, /* size of l2 cache */
238 0, /* size of prefetch block */
239 0, /* number of parallel prefetches */
241 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
242 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
243 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
244 COSTS_N_INSNS (3), /* cost of FABS instruction. */
245 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
246 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
247 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
248 DUMMY_STRINGOP_ALGS},
249 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
254 struct processor_costs pentium_cost = {
255 COSTS_N_INSNS (1), /* cost of an add instruction */
256 COSTS_N_INSNS (1), /* cost of a lea instruction */
257 COSTS_N_INSNS (4), /* variable shift costs */
258 COSTS_N_INSNS (1), /* constant shift costs */
259 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
260 COSTS_N_INSNS (11), /* HI */
261 COSTS_N_INSNS (11), /* SI */
262 COSTS_N_INSNS (11), /* DI */
263 COSTS_N_INSNS (11)}, /* other */
264 0, /* cost of multiply per each bit set */
265 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
266 COSTS_N_INSNS (25), /* HI */
267 COSTS_N_INSNS (25), /* SI */
268 COSTS_N_INSNS (25), /* DI */
269 COSTS_N_INSNS (25)}, /* other */
270 COSTS_N_INSNS (3), /* cost of movsx */
271 COSTS_N_INSNS (2), /* cost of movzx */
272 8, /* "large" insn */
274 6, /* cost for loading QImode using movzbl */
275 {2, 4, 2}, /* cost of loading integer registers
276 in QImode, HImode and SImode.
277 Relative to reg-reg move (2). */
278 {2, 4, 2}, /* cost of storing integer registers */
279 2, /* cost of reg,reg fld/fst */
280 {2, 2, 6}, /* cost of loading fp registers
281 in SFmode, DFmode and XFmode */
282 {4, 4, 6}, /* cost of storing fp registers
283 in SFmode, DFmode and XFmode */
284 8, /* cost of moving MMX register */
285 {8, 8}, /* cost of loading MMX registers
286 in SImode and DImode */
287 {8, 8}, /* cost of storing MMX registers
288 in SImode and DImode */
289 2, /* cost of moving SSE register */
290 {4, 8, 16}, /* cost of loading SSE registers
291 in SImode, DImode and TImode */
292 {4, 8, 16}, /* cost of storing SSE registers
293 in SImode, DImode and TImode */
294 3, /* MMX or SSE register to integer */
295 8, /* size of l1 cache. */
296 8, /* size of l2 cache */
297 0, /* size of prefetch block */
298 0, /* number of parallel prefetches */
300 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
301 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
302 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
303 COSTS_N_INSNS (1), /* cost of FABS instruction. */
304 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
305 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
306 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
307 DUMMY_STRINGOP_ALGS},
308 {{libcall, {{-1, rep_prefix_4_byte}}},
313 struct processor_costs pentiumpro_cost = {
314 COSTS_N_INSNS (1), /* cost of an add instruction */
315 COSTS_N_INSNS (1), /* cost of a lea instruction */
316 COSTS_N_INSNS (1), /* variable shift costs */
317 COSTS_N_INSNS (1), /* constant shift costs */
318 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
319 COSTS_N_INSNS (4), /* HI */
320 COSTS_N_INSNS (4), /* SI */
321 COSTS_N_INSNS (4), /* DI */
322 COSTS_N_INSNS (4)}, /* other */
323 0, /* cost of multiply per each bit set */
324 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
325 COSTS_N_INSNS (17), /* HI */
326 COSTS_N_INSNS (17), /* SI */
327 COSTS_N_INSNS (17), /* DI */
328 COSTS_N_INSNS (17)}, /* other */
329 COSTS_N_INSNS (1), /* cost of movsx */
330 COSTS_N_INSNS (1), /* cost of movzx */
331 8, /* "large" insn */
333 2, /* cost for loading QImode using movzbl */
334 {4, 4, 4}, /* cost of loading integer registers
335 in QImode, HImode and SImode.
336 Relative to reg-reg move (2). */
337 {2, 2, 2}, /* cost of storing integer registers */
338 2, /* cost of reg,reg fld/fst */
339 {2, 2, 6}, /* cost of loading fp registers
340 in SFmode, DFmode and XFmode */
341 {4, 4, 6}, /* cost of storing fp registers
342 in SFmode, DFmode and XFmode */
343 2, /* cost of moving MMX register */
344 {2, 2}, /* cost of loading MMX registers
345 in SImode and DImode */
346 {2, 2}, /* cost of storing MMX registers
347 in SImode and DImode */
348 2, /* cost of moving SSE register */
349 {2, 2, 8}, /* cost of loading SSE registers
350 in SImode, DImode and TImode */
351 {2, 2, 8}, /* cost of storing SSE registers
352 in SImode, DImode and TImode */
353 3, /* MMX or SSE register to integer */
354 8, /* size of l1 cache. */
355 256, /* size of l2 cache */
356 32, /* size of prefetch block */
357 6, /* number of parallel prefetches */
359 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
360 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
361 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
362 COSTS_N_INSNS (2), /* cost of FABS instruction. */
363 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
364 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
365 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
366 the alignment). For small blocks inline loop is still a noticeable win, for bigger
367 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
368 more expensive startup time in CPU, but after 4K the difference is down in the noise.
370 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
371 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
372 DUMMY_STRINGOP_ALGS},
373 {{rep_prefix_4_byte, {{1024, unrolled_loop},
374 {8192, rep_prefix_4_byte}, {-1, libcall}}},
379 struct processor_costs geode_cost = {
380 COSTS_N_INSNS (1), /* cost of an add instruction */
381 COSTS_N_INSNS (1), /* cost of a lea instruction */
382 COSTS_N_INSNS (2), /* variable shift costs */
383 COSTS_N_INSNS (1), /* constant shift costs */
384 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
385 COSTS_N_INSNS (4), /* HI */
386 COSTS_N_INSNS (7), /* SI */
387 COSTS_N_INSNS (7), /* DI */
388 COSTS_N_INSNS (7)}, /* other */
389 0, /* cost of multiply per each bit set */
390 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
391 COSTS_N_INSNS (23), /* HI */
392 COSTS_N_INSNS (39), /* SI */
393 COSTS_N_INSNS (39), /* DI */
394 COSTS_N_INSNS (39)}, /* other */
395 COSTS_N_INSNS (1), /* cost of movsx */
396 COSTS_N_INSNS (1), /* cost of movzx */
397 8, /* "large" insn */
399 1, /* cost for loading QImode using movzbl */
400 {1, 1, 1}, /* cost of loading integer registers
401 in QImode, HImode and SImode.
402 Relative to reg-reg move (2). */
403 {1, 1, 1}, /* cost of storing integer registers */
404 1, /* cost of reg,reg fld/fst */
405 {1, 1, 1}, /* cost of loading fp registers
406 in SFmode, DFmode and XFmode */
407 {4, 6, 6}, /* cost of storing fp registers
408 in SFmode, DFmode and XFmode */
410 1, /* cost of moving MMX register */
411 {1, 1}, /* cost of loading MMX registers
412 in SImode and DImode */
413 {1, 1}, /* cost of storing MMX registers
414 in SImode and DImode */
415 1, /* cost of moving SSE register */
416 {1, 1, 1}, /* cost of loading SSE registers
417 in SImode, DImode and TImode */
418 {1, 1, 1}, /* cost of storing SSE registers
419 in SImode, DImode and TImode */
420 1, /* MMX or SSE register to integer */
421 64, /* size of l1 cache. */
422 128, /* size of l2 cache. */
423 32, /* size of prefetch block */
424 1, /* number of parallel prefetches */
426 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
427 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
428 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
429 COSTS_N_INSNS (1), /* cost of FABS instruction. */
430 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
431 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
432 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
433 DUMMY_STRINGOP_ALGS},
434 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
439 struct processor_costs k6_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (2), /* cost of a lea instruction */
442 COSTS_N_INSNS (1), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (3), /* HI */
446 COSTS_N_INSNS (3), /* SI */
447 COSTS_N_INSNS (3), /* DI */
448 COSTS_N_INSNS (3)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (18), /* HI */
452 COSTS_N_INSNS (18), /* SI */
453 COSTS_N_INSNS (18), /* DI */
454 COSTS_N_INSNS (18)}, /* other */
455 COSTS_N_INSNS (2), /* cost of movsx */
456 COSTS_N_INSNS (2), /* cost of movzx */
457 8, /* "large" insn */
459 3, /* cost for loading QImode using movzbl */
460 {4, 5, 4}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {2, 3, 2}, /* cost of storing integer registers */
464 4, /* cost of reg,reg fld/fst */
465 {6, 6, 6}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 4, 4}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
469 2, /* cost of moving MMX register */
470 {2, 2}, /* cost of loading MMX registers
471 in SImode and DImode */
472 {2, 2}, /* cost of storing MMX registers
473 in SImode and DImode */
474 2, /* cost of moving SSE register */
475 {2, 2, 8}, /* cost of loading SSE registers
476 in SImode, DImode and TImode */
477 {2, 2, 8}, /* cost of storing SSE registers
478 in SImode, DImode and TImode */
479 6, /* MMX or SSE register to integer */
480 32, /* size of l1 cache. */
481 32, /* size of l2 cache. Some models
482 have integrated l2 cache, but
483 optimizing for k6 is not important
484 enough to worry about that. */
485 32, /* size of prefetch block */
486 1, /* number of parallel prefetches */
488 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
489 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
490 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
491 COSTS_N_INSNS (2), /* cost of FABS instruction. */
492 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
493 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
501 struct processor_costs athlon_cost = {
502 COSTS_N_INSNS (1), /* cost of an add instruction */
503 COSTS_N_INSNS (2), /* cost of a lea instruction */
504 COSTS_N_INSNS (1), /* variable shift costs */
505 COSTS_N_INSNS (1), /* constant shift costs */
506 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
507 COSTS_N_INSNS (5), /* HI */
508 COSTS_N_INSNS (5), /* SI */
509 COSTS_N_INSNS (5), /* DI */
510 COSTS_N_INSNS (5)}, /* other */
511 0, /* cost of multiply per each bit set */
512 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
513 COSTS_N_INSNS (26), /* HI */
514 COSTS_N_INSNS (42), /* SI */
515 COSTS_N_INSNS (74), /* DI */
516 COSTS_N_INSNS (74)}, /* other */
517 COSTS_N_INSNS (1), /* cost of movsx */
518 COSTS_N_INSNS (1), /* cost of movzx */
519 8, /* "large" insn */
521 4, /* cost for loading QImode using movzbl */
522 {3, 4, 3}, /* cost of loading integer registers
523 in QImode, HImode and SImode.
524 Relative to reg-reg move (2). */
525 {3, 4, 3}, /* cost of storing integer registers */
526 4, /* cost of reg,reg fld/fst */
527 {4, 4, 12}, /* cost of loading fp registers
528 in SFmode, DFmode and XFmode */
529 {6, 6, 8}, /* cost of storing fp registers
530 in SFmode, DFmode and XFmode */
531 2, /* cost of moving MMX register */
532 {4, 4}, /* cost of loading MMX registers
533 in SImode and DImode */
534 {4, 4}, /* cost of storing MMX registers
535 in SImode and DImode */
536 2, /* cost of moving SSE register */
537 {4, 4, 6}, /* cost of loading SSE registers
538 in SImode, DImode and TImode */
539 {4, 4, 5}, /* cost of storing SSE registers
540 in SImode, DImode and TImode */
541 5, /* MMX or SSE register to integer */
542 64, /* size of l1 cache. */
543 256, /* size of l2 cache. */
544 64, /* size of prefetch block */
545 6, /* number of parallel prefetches */
547 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (2), /* cost of FABS instruction. */
551 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
553 /* For some reason, Athlon deals better with REP prefix (relative to loops)
554 compared to K8. Alignment becomes important after 8 bytes for memcpy and
555 128 bytes for memset. */
556 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
557 DUMMY_STRINGOP_ALGS},
558 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
563 struct processor_costs k8_cost = {
564 COSTS_N_INSNS (1), /* cost of an add instruction */
565 COSTS_N_INSNS (2), /* cost of a lea instruction */
566 COSTS_N_INSNS (1), /* variable shift costs */
567 COSTS_N_INSNS (1), /* constant shift costs */
568 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
569 COSTS_N_INSNS (4), /* HI */
570 COSTS_N_INSNS (3), /* SI */
571 COSTS_N_INSNS (4), /* DI */
572 COSTS_N_INSNS (5)}, /* other */
573 0, /* cost of multiply per each bit set */
574 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
575 COSTS_N_INSNS (26), /* HI */
576 COSTS_N_INSNS (42), /* SI */
577 COSTS_N_INSNS (74), /* DI */
578 COSTS_N_INSNS (74)}, /* other */
579 COSTS_N_INSNS (1), /* cost of movsx */
580 COSTS_N_INSNS (1), /* cost of movzx */
581 8, /* "large" insn */
583 4, /* cost for loading QImode using movzbl */
584 {3, 4, 3}, /* cost of loading integer registers
585 in QImode, HImode and SImode.
586 Relative to reg-reg move (2). */
587 {3, 4, 3}, /* cost of storing integer registers */
588 4, /* cost of reg,reg fld/fst */
589 {4, 4, 12}, /* cost of loading fp registers
590 in SFmode, DFmode and XFmode */
591 {6, 6, 8}, /* cost of storing fp registers
592 in SFmode, DFmode and XFmode */
593 2, /* cost of moving MMX register */
594 {3, 3}, /* cost of loading MMX registers
595 in SImode and DImode */
596 {4, 4}, /* cost of storing MMX registers
597 in SImode and DImode */
598 2, /* cost of moving SSE register */
599 {4, 3, 6}, /* cost of loading SSE registers
600 in SImode, DImode and TImode */
601 {4, 4, 5}, /* cost of storing SSE registers
602 in SImode, DImode and TImode */
603 5, /* MMX or SSE register to integer */
604 64, /* size of l1 cache. */
605 512, /* size of l2 cache. */
606 64, /* size of prefetch block */
607 /* New AMD processors never drop prefetches; if they cannot be performed
608 immediately, they are queued. We set number of simultaneous prefetches
609 to a large constant to reflect this (it probably is not a good idea not
610 to limit number of prefetches at all, as their execution also takes some
612 100, /* number of parallel prefetches */
614 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
615 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
616 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
617 COSTS_N_INSNS (2), /* cost of FABS instruction. */
618 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
619 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
620 /* K8 has optimized REP instruction for medium sized blocks, but for very small
621 blocks it is better to use loop. For large blocks, libcall can do
622 nontemporary accesses and beat inline considerably. */
623 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
624 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
625 {{libcall, {{8, loop}, {24, unrolled_loop},
626 {2048, rep_prefix_4_byte}, {-1, libcall}}},
627 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
630 struct processor_costs amdfam10_cost = {
631 COSTS_N_INSNS (1), /* cost of an add instruction */
632 COSTS_N_INSNS (2), /* cost of a lea instruction */
633 COSTS_N_INSNS (1), /* variable shift costs */
634 COSTS_N_INSNS (1), /* constant shift costs */
635 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
636 COSTS_N_INSNS (4), /* HI */
637 COSTS_N_INSNS (3), /* SI */
638 COSTS_N_INSNS (4), /* DI */
639 COSTS_N_INSNS (5)}, /* other */
640 0, /* cost of multiply per each bit set */
641 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
642 COSTS_N_INSNS (35), /* HI */
643 COSTS_N_INSNS (51), /* SI */
644 COSTS_N_INSNS (83), /* DI */
645 COSTS_N_INSNS (83)}, /* other */
646 COSTS_N_INSNS (1), /* cost of movsx */
647 COSTS_N_INSNS (1), /* cost of movzx */
648 8, /* "large" insn */
650 4, /* cost for loading QImode using movzbl */
651 {3, 4, 3}, /* cost of loading integer registers
652 in QImode, HImode and SImode.
653 Relative to reg-reg move (2). */
654 {3, 4, 3}, /* cost of storing integer registers */
655 4, /* cost of reg,reg fld/fst */
656 {4, 4, 12}, /* cost of loading fp registers
657 in SFmode, DFmode and XFmode */
658 {6, 6, 8}, /* cost of storing fp registers
659 in SFmode, DFmode and XFmode */
660 2, /* cost of moving MMX register */
661 {3, 3}, /* cost of loading MMX registers
662 in SImode and DImode */
663 {4, 4}, /* cost of storing MMX registers
664 in SImode and DImode */
665 2, /* cost of moving SSE register */
666 {4, 4, 3}, /* cost of loading SSE registers
667 in SImode, DImode and TImode */
668 {4, 4, 5}, /* cost of storing SSE registers
669 in SImode, DImode and TImode */
670 3, /* MMX or SSE register to integer */
672 MOVD reg64, xmmreg Double FSTORE 4
673 MOVD reg32, xmmreg Double FSTORE 4
675 MOVD reg64, xmmreg Double FADD 3
677 MOVD reg32, xmmreg Double FADD 3
679 64, /* size of l1 cache. */
680 512, /* size of l2 cache. */
681 64, /* size of prefetch block */
682 /* New AMD processors never drop prefetches; if they cannot be performed
683 immediately, they are queued. We set number of simultaneous prefetches
684 to a large constant to reflect this (it probably is not a good idea not
685 to limit number of prefetches at all, as their execution also takes some
687 100, /* number of parallel prefetches */
689 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
690 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
691 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
692 COSTS_N_INSNS (2), /* cost of FABS instruction. */
693 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
694 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
696 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
697 very small blocks it is better to use loop. For large blocks, libcall can
698 do nontemporary accesses and beat inline considerably. */
699 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
700 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
701 {{libcall, {{8, loop}, {24, unrolled_loop},
702 {2048, rep_prefix_4_byte}, {-1, libcall}}},
703 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
707 struct processor_costs pentium4_cost = {
708 COSTS_N_INSNS (1), /* cost of an add instruction */
709 COSTS_N_INSNS (3), /* cost of a lea instruction */
710 COSTS_N_INSNS (4), /* variable shift costs */
711 COSTS_N_INSNS (4), /* constant shift costs */
712 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
713 COSTS_N_INSNS (15), /* HI */
714 COSTS_N_INSNS (15), /* SI */
715 COSTS_N_INSNS (15), /* DI */
716 COSTS_N_INSNS (15)}, /* other */
717 0, /* cost of multiply per each bit set */
718 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
719 COSTS_N_INSNS (56), /* HI */
720 COSTS_N_INSNS (56), /* SI */
721 COSTS_N_INSNS (56), /* DI */
722 COSTS_N_INSNS (56)}, /* other */
723 COSTS_N_INSNS (1), /* cost of movsx */
724 COSTS_N_INSNS (1), /* cost of movzx */
725 16, /* "large" insn */
727 2, /* cost for loading QImode using movzbl */
728 {4, 5, 4}, /* cost of loading integer registers
729 in QImode, HImode and SImode.
730 Relative to reg-reg move (2). */
731 {2, 3, 2}, /* cost of storing integer registers */
732 2, /* cost of reg,reg fld/fst */
733 {2, 2, 6}, /* cost of loading fp registers
734 in SFmode, DFmode and XFmode */
735 {4, 4, 6}, /* cost of storing fp registers
736 in SFmode, DFmode and XFmode */
737 2, /* cost of moving MMX register */
738 {2, 2}, /* cost of loading MMX registers
739 in SImode and DImode */
740 {2, 2}, /* cost of storing MMX registers
741 in SImode and DImode */
742 12, /* cost of moving SSE register */
743 {12, 12, 12}, /* cost of loading SSE registers
744 in SImode, DImode and TImode */
745 {2, 2, 8}, /* cost of storing SSE registers
746 in SImode, DImode and TImode */
747 10, /* MMX or SSE register to integer */
748 8, /* size of l1 cache. */
749 256, /* size of l2 cache. */
750 64, /* size of prefetch block */
751 6, /* number of parallel prefetches */
753 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
754 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
755 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
756 COSTS_N_INSNS (2), /* cost of FABS instruction. */
757 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
758 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
759 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
760 DUMMY_STRINGOP_ALGS},
761 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
763 DUMMY_STRINGOP_ALGS},
767 struct processor_costs nocona_cost = {
768 COSTS_N_INSNS (1), /* cost of an add instruction */
769 COSTS_N_INSNS (1), /* cost of a lea instruction */
770 COSTS_N_INSNS (1), /* variable shift costs */
771 COSTS_N_INSNS (1), /* constant shift costs */
772 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
773 COSTS_N_INSNS (10), /* HI */
774 COSTS_N_INSNS (10), /* SI */
775 COSTS_N_INSNS (10), /* DI */
776 COSTS_N_INSNS (10)}, /* other */
777 0, /* cost of multiply per each bit set */
778 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
779 COSTS_N_INSNS (66), /* HI */
780 COSTS_N_INSNS (66), /* SI */
781 COSTS_N_INSNS (66), /* DI */
782 COSTS_N_INSNS (66)}, /* other */
783 COSTS_N_INSNS (1), /* cost of movsx */
784 COSTS_N_INSNS (1), /* cost of movzx */
785 16, /* "large" insn */
787 4, /* cost for loading QImode using movzbl */
788 {4, 4, 4}, /* cost of loading integer registers
789 in QImode, HImode and SImode.
790 Relative to reg-reg move (2). */
791 {4, 4, 4}, /* cost of storing integer registers */
792 3, /* cost of reg,reg fld/fst */
793 {12, 12, 12}, /* cost of loading fp registers
794 in SFmode, DFmode and XFmode */
795 {4, 4, 4}, /* cost of storing fp registers
796 in SFmode, DFmode and XFmode */
797 6, /* cost of moving MMX register */
798 {12, 12}, /* cost of loading MMX registers
799 in SImode and DImode */
800 {12, 12}, /* cost of storing MMX registers
801 in SImode and DImode */
802 6, /* cost of moving SSE register */
803 {12, 12, 12}, /* cost of loading SSE registers
804 in SImode, DImode and TImode */
805 {12, 12, 12}, /* cost of storing SSE registers
806 in SImode, DImode and TImode */
807 8, /* MMX or SSE register to integer */
808 8, /* size of l1 cache. */
809 1024, /* size of l2 cache. */
810 128, /* size of prefetch block */
811 8, /* number of parallel prefetches */
813 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
814 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
815 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
816 COSTS_N_INSNS (3), /* cost of FABS instruction. */
817 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
818 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
819 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
820 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
821 {100000, unrolled_loop}, {-1, libcall}}}},
822 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
824 {libcall, {{24, loop}, {64, unrolled_loop},
825 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
829 struct processor_costs core2_cost = {
830 COSTS_N_INSNS (1), /* cost of an add instruction */
831 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
832 COSTS_N_INSNS (1), /* variable shift costs */
833 COSTS_N_INSNS (1), /* constant shift costs */
834 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
835 COSTS_N_INSNS (3), /* HI */
836 COSTS_N_INSNS (3), /* SI */
837 COSTS_N_INSNS (3), /* DI */
838 COSTS_N_INSNS (3)}, /* other */
839 0, /* cost of multiply per each bit set */
840 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
841 COSTS_N_INSNS (22), /* HI */
842 COSTS_N_INSNS (22), /* SI */
843 COSTS_N_INSNS (22), /* DI */
844 COSTS_N_INSNS (22)}, /* other */
845 COSTS_N_INSNS (1), /* cost of movsx */
846 COSTS_N_INSNS (1), /* cost of movzx */
847 8, /* "large" insn */
849 2, /* cost for loading QImode using movzbl */
850 {6, 6, 6}, /* cost of loading integer registers
851 in QImode, HImode and SImode.
852 Relative to reg-reg move (2). */
853 {4, 4, 4}, /* cost of storing integer registers */
854 2, /* cost of reg,reg fld/fst */
855 {6, 6, 6}, /* cost of loading fp registers
856 in SFmode, DFmode and XFmode */
857 {4, 4, 4}, /* cost of loading integer registers */
858 2, /* cost of moving MMX register */
859 {6, 6}, /* cost of loading MMX registers
860 in SImode and DImode */
861 {4, 4}, /* cost of storing MMX registers
862 in SImode and DImode */
863 2, /* cost of moving SSE register */
864 {6, 6, 6}, /* cost of loading SSE registers
865 in SImode, DImode and TImode */
866 {4, 4, 4}, /* cost of storing SSE registers
867 in SImode, DImode and TImode */
868 2, /* MMX or SSE register to integer */
869 32, /* size of l1 cache. */
870 2048, /* size of l2 cache. */
871 128, /* size of prefetch block */
872 8, /* number of parallel prefetches */
874 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
875 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
876 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
877 COSTS_N_INSNS (1), /* cost of FABS instruction. */
878 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
879 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
880 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
881 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
882 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
883 {{libcall, {{8, loop}, {15, unrolled_loop},
884 {2048, rep_prefix_4_byte}, {-1, libcall}}},
885 {libcall, {{24, loop}, {32, unrolled_loop},
886 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
889 /* Generic64 should produce code tuned for Nocona and K8. */
891 struct processor_costs generic64_cost = {
892 COSTS_N_INSNS (1), /* cost of an add instruction */
893 /* On all chips taken into consideration lea is 2 cycles and more. With
894 this cost however our current implementation of synth_mult results in
895 use of unnecessary temporary registers causing regression on several
896 SPECfp benchmarks. */
897 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
898 COSTS_N_INSNS (1), /* variable shift costs */
899 COSTS_N_INSNS (1), /* constant shift costs */
900 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
901 COSTS_N_INSNS (4), /* HI */
902 COSTS_N_INSNS (3), /* SI */
903 COSTS_N_INSNS (4), /* DI */
904 COSTS_N_INSNS (2)}, /* other */
905 0, /* cost of multiply per each bit set */
906 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
907 COSTS_N_INSNS (26), /* HI */
908 COSTS_N_INSNS (42), /* SI */
909 COSTS_N_INSNS (74), /* DI */
910 COSTS_N_INSNS (74)}, /* other */
911 COSTS_N_INSNS (1), /* cost of movsx */
912 COSTS_N_INSNS (1), /* cost of movzx */
913 8, /* "large" insn */
915 4, /* cost for loading QImode using movzbl */
916 {4, 4, 4}, /* cost of loading integer registers
917 in QImode, HImode and SImode.
918 Relative to reg-reg move (2). */
919 {4, 4, 4}, /* cost of storing integer registers */
920 4, /* cost of reg,reg fld/fst */
921 {12, 12, 12}, /* cost of loading fp registers
922 in SFmode, DFmode and XFmode */
923 {6, 6, 8}, /* cost of storing fp registers
924 in SFmode, DFmode and XFmode */
925 2, /* cost of moving MMX register */
926 {8, 8}, /* cost of loading MMX registers
927 in SImode and DImode */
928 {8, 8}, /* cost of storing MMX registers
929 in SImode and DImode */
930 2, /* cost of moving SSE register */
931 {8, 8, 8}, /* cost of loading SSE registers
932 in SImode, DImode and TImode */
933 {8, 8, 8}, /* cost of storing SSE registers
934 in SImode, DImode and TImode */
935 5, /* MMX or SSE register to integer */
936 32, /* size of l1 cache. */
937 512, /* size of l2 cache. */
938 64, /* size of prefetch block */
939 6, /* number of parallel prefetches */
940 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
941 is increased to perhaps more appropriate value of 5. */
943 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
944 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
945 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
946 COSTS_N_INSNS (8), /* cost of FABS instruction. */
947 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
948 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
949 {DUMMY_STRINGOP_ALGS,
950 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
951 {DUMMY_STRINGOP_ALGS,
952 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
955 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
957 struct processor_costs generic32_cost = {
958 COSTS_N_INSNS (1), /* cost of an add instruction */
959 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
960 COSTS_N_INSNS (1), /* variable shift costs */
961 COSTS_N_INSNS (1), /* constant shift costs */
962 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
963 COSTS_N_INSNS (4), /* HI */
964 COSTS_N_INSNS (3), /* SI */
965 COSTS_N_INSNS (4), /* DI */
966 COSTS_N_INSNS (2)}, /* other */
967 0, /* cost of multiply per each bit set */
968 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
969 COSTS_N_INSNS (26), /* HI */
970 COSTS_N_INSNS (42), /* SI */
971 COSTS_N_INSNS (74), /* DI */
972 COSTS_N_INSNS (74)}, /* other */
973 COSTS_N_INSNS (1), /* cost of movsx */
974 COSTS_N_INSNS (1), /* cost of movzx */
975 8, /* "large" insn */
977 4, /* cost for loading QImode using movzbl */
978 {4, 4, 4}, /* cost of loading integer registers
979 in QImode, HImode and SImode.
980 Relative to reg-reg move (2). */
981 {4, 4, 4}, /* cost of storing integer registers */
982 4, /* cost of reg,reg fld/fst */
983 {12, 12, 12}, /* cost of loading fp registers
984 in SFmode, DFmode and XFmode */
985 {6, 6, 8}, /* cost of storing fp registers
986 in SFmode, DFmode and XFmode */
987 2, /* cost of moving MMX register */
988 {8, 8}, /* cost of loading MMX registers
989 in SImode and DImode */
990 {8, 8}, /* cost of storing MMX registers
991 in SImode and DImode */
992 2, /* cost of moving SSE register */
993 {8, 8, 8}, /* cost of loading SSE registers
994 in SImode, DImode and TImode */
995 {8, 8, 8}, /* cost of storing SSE registers
996 in SImode, DImode and TImode */
997 5, /* MMX or SSE register to integer */
998 32, /* size of l1 cache. */
999 256, /* size of l2 cache. */
1000 64, /* size of prefetch block */
1001 6, /* number of parallel prefetches */
1002 3, /* Branch cost */
1003 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1004 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1005 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1006 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1007 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1008 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1009 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1010 DUMMY_STRINGOP_ALGS},
1011 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1012 DUMMY_STRINGOP_ALGS},
1015 const struct processor_costs *ix86_cost = &pentium_cost;
1017 /* Processor feature/optimization bitmasks. */
1018 #define m_386 (1<<PROCESSOR_I386)
1019 #define m_486 (1<<PROCESSOR_I486)
1020 #define m_PENT (1<<PROCESSOR_PENTIUM)
1021 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1022 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1023 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1024 #define m_CORE2 (1<<PROCESSOR_CORE2)
1026 #define m_GEODE (1<<PROCESSOR_GEODE)
1027 #define m_K6 (1<<PROCESSOR_K6)
1028 #define m_K6_GEODE (m_K6 | m_GEODE)
1029 #define m_K8 (1<<PROCESSOR_K8)
1030 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1031 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1032 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1033 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1035 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1036 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1038 /* Generic instruction choice should be common subset of supported CPUs
1039 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1040 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1042 /* Feature tests against the various tunings. */
1043 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1044 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1045 negatively, so enabling for Generic64 seems like good code size
1046 tradeoff. We can't enable it for 32bit generic because it does not
1047 work well with PPro base chips. */
1048 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1050 /* X86_TUNE_PUSH_MEMORY */
1051 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1052 | m_NOCONA | m_CORE2 | m_GENERIC,
1054 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1057 /* X86_TUNE_USE_BIT_TEST */
1060 /* X86_TUNE_UNROLL_STRLEN */
1061 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1063 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1064 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1066 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1067 on simulation result. But after P4 was made, no performance benefit
1068 was observed with branch hints. It also increases the code size.
1069 As a result, icc never generates branch hints. */
1072 /* X86_TUNE_DOUBLE_WITH_ADD */
1075 /* X86_TUNE_USE_SAHF */
1076 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1077 | m_NOCONA | m_CORE2 | m_GENERIC,
1079 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1080 partial dependencies. */
1081 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1082 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1084 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1085 register stalls on Generic32 compilation setting as well. However
1086 in current implementation the partial register stalls are not eliminated
1087 very well - they can be introduced via subregs synthesized by combine
1088 and can happen in caller/callee saving sequences. Because this option
1089 pays back little on PPro based chips and is in conflict with partial reg
1090 dependencies used by Athlon/P4 based chips, it is better to leave it off
1091 for generic32 for now. */
1094 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1095 m_CORE2 | m_GENERIC,
1097 /* X86_TUNE_USE_HIMODE_FIOP */
1098 m_386 | m_486 | m_K6_GEODE,
1100 /* X86_TUNE_USE_SIMODE_FIOP */
1101 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1103 /* X86_TUNE_USE_MOV0 */
1106 /* X86_TUNE_USE_CLTD */
1107 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1109 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1112 /* X86_TUNE_SPLIT_LONG_MOVES */
1115 /* X86_TUNE_READ_MODIFY_WRITE */
1118 /* X86_TUNE_READ_MODIFY */
1121 /* X86_TUNE_PROMOTE_QIMODE */
1122 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1123 | m_GENERIC /* | m_PENT4 ? */,
1125 /* X86_TUNE_FAST_PREFIX */
1126 ~(m_PENT | m_486 | m_386),
1128 /* X86_TUNE_SINGLE_STRINGOP */
1129 m_386 | m_PENT4 | m_NOCONA,
1131 /* X86_TUNE_QIMODE_MATH */
1134 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1135 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1136 might be considered for Generic32 if our scheme for avoiding partial
1137 stalls was more effective. */
1140 /* X86_TUNE_PROMOTE_QI_REGS */
1143 /* X86_TUNE_PROMOTE_HI_REGS */
1146 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1147 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1149 /* X86_TUNE_ADD_ESP_8 */
1150 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1151 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1153 /* X86_TUNE_SUB_ESP_4 */
1154 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1156 /* X86_TUNE_SUB_ESP_8 */
1157 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1158 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1160 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1161 for DFmode copies */
1162 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1163 | m_GENERIC | m_GEODE),
1165 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1166 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1168 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1169 conflict here in between PPro/Pentium4 based chips that thread 128bit
1170 SSE registers as single units versus K8 based chips that divide SSE
1171 registers to two 64bit halves. This knob promotes all store destinations
1172 to be 128bit to allow register renaming on 128bit SSE units, but usually
1173 results in one extra microop on 64bit SSE units. Experimental results
1174 shows that disabling this option on P4 brings over 20% SPECfp regression,
1175 while enabling it on K8 brings roughly 2.4% regression that can be partly
1176 masked by careful scheduling of moves. */
1177 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1179 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1182 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1183 are resolved on SSE register parts instead of whole registers, so we may
1184 maintain just lower part of scalar values in proper format leaving the
1185 upper part undefined. */
1188 /* X86_TUNE_SSE_TYPELESS_STORES */
1189 m_ATHLON_K8_AMDFAM10,
1191 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1192 m_PPRO | m_PENT4 | m_NOCONA,
1194 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1195 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1197 /* X86_TUNE_PROLOGUE_USING_MOVE */
1198 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1200 /* X86_TUNE_EPILOGUE_USING_MOVE */
1201 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1203 /* X86_TUNE_SHIFT1 */
1206 /* X86_TUNE_USE_FFREEP */
1207 m_ATHLON_K8_AMDFAM10,
1209 /* X86_TUNE_INTER_UNIT_MOVES */
1210 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1212 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1213 than 4 branch instructions in the 16 byte window. */
1214 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1216 /* X86_TUNE_SCHEDULE */
1217 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1219 /* X86_TUNE_USE_BT */
1220 m_ATHLON_K8_AMDFAM10,
1222 /* X86_TUNE_USE_INCDEC */
1223 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1225 /* X86_TUNE_PAD_RETURNS */
1226 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1228 /* X86_TUNE_EXT_80387_CONSTANTS */
1229 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1231 /* X86_TUNE_SHORTEN_X87_SSE */
1234 /* X86_TUNE_AVOID_VECTOR_DECODE */
1237 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1238 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1241 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1242 vector path on AMD machines. */
1243 m_K8 | m_GENERIC64 | m_AMDFAM10,
1245 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1247 m_K8 | m_GENERIC64 | m_AMDFAM10,
1249 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1253 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1254 but one byte longer. */
1257 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1258 operand that cannot be represented using a modRM byte. The XOR
1259 replacement is long decoded, so this split helps here as well. */
1263 /* Feature tests against the various architecture variations. */
1264 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1265 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1266 ~(m_386 | m_486 | m_PENT | m_K6),
1268 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1271 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1274 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1277 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1281 static const unsigned int x86_accumulate_outgoing_args
1282 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1284 static const unsigned int x86_arch_always_fancy_math_387
1285 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1286 | m_NOCONA | m_CORE2 | m_GENERIC;
1288 static enum stringop_alg stringop_alg = no_stringop;
1290 /* In case the average insn count for single function invocation is
1291 lower than this constant, emit fast (but longer) prologue and
1293 #define FAST_PROLOGUE_INSN_COUNT 20
1295 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1296 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1297 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1298 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1300 /* Array of the smallest class containing reg number REGNO, indexed by
1301 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1303 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1305 /* ax, dx, cx, bx */
1306 AREG, DREG, CREG, BREG,
1307 /* si, di, bp, sp */
1308 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1310 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1311 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1314 /* flags, fpsr, fpcr, frame */
1315 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1317 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1320 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1323 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1324 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1325 /* SSE REX registers */
1326 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1330 /* The "default" register map used in 32bit mode. */
1332 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1334 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1335 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1336 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1337 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1338 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1339 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1340 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1343 static int const x86_64_int_parameter_registers[6] =
1345 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1346 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1349 static int const x86_64_ms_abi_int_parameter_registers[4] =
1351 2 /*RCX*/, 1 /*RDX*/,
1352 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1355 static int const x86_64_int_return_registers[4] =
1357 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1360 /* The "default" register map used in 64bit mode. */
1361 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1363 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1364 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1365 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1366 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1367 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1368 8,9,10,11,12,13,14,15, /* extended integer registers */
1369 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1372 /* Define the register numbers to be used in Dwarf debugging information.
1373 The SVR4 reference port C compiler uses the following register numbers
1374 in its Dwarf output code:
1375 0 for %eax (gcc regno = 0)
1376 1 for %ecx (gcc regno = 2)
1377 2 for %edx (gcc regno = 1)
1378 3 for %ebx (gcc regno = 3)
1379 4 for %esp (gcc regno = 7)
1380 5 for %ebp (gcc regno = 6)
1381 6 for %esi (gcc regno = 4)
1382 7 for %edi (gcc regno = 5)
1383 The following three DWARF register numbers are never generated by
1384 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1385 believes these numbers have these meanings.
1386 8 for %eip (no gcc equivalent)
1387 9 for %eflags (gcc regno = 17)
1388 10 for %trapno (no gcc equivalent)
1389 It is not at all clear how we should number the FP stack registers
1390 for the x86 architecture. If the version of SDB on x86/svr4 were
1391 a bit less brain dead with respect to floating-point then we would
1392 have a precedent to follow with respect to DWARF register numbers
1393 for x86 FP registers, but the SDB on x86/svr4 is so completely
1394 broken with respect to FP registers that it is hardly worth thinking
1395 of it as something to strive for compatibility with.
1396 The version of x86/svr4 SDB I have at the moment does (partially)
1397 seem to believe that DWARF register number 11 is associated with
1398 the x86 register %st(0), but that's about all. Higher DWARF
1399 register numbers don't seem to be associated with anything in
1400 particular, and even for DWARF regno 11, SDB only seems to under-
1401 stand that it should say that a variable lives in %st(0) (when
1402 asked via an `=' command) if we said it was in DWARF regno 11,
1403 but SDB still prints garbage when asked for the value of the
1404 variable in question (via a `/' command).
1405 (Also note that the labels SDB prints for various FP stack regs
1406 when doing an `x' command are all wrong.)
1407 Note that these problems generally don't affect the native SVR4
1408 C compiler because it doesn't allow the use of -O with -g and
1409 because when it is *not* optimizing, it allocates a memory
1410 location for each floating-point variable, and the memory
1411 location is what gets described in the DWARF AT_location
1412 attribute for the variable in question.
1413 Regardless of the severe mental illness of the x86/svr4 SDB, we
1414 do something sensible here and we use the following DWARF
1415 register numbers. Note that these are all stack-top-relative
1417 11 for %st(0) (gcc regno = 8)
1418 12 for %st(1) (gcc regno = 9)
1419 13 for %st(2) (gcc regno = 10)
1420 14 for %st(3) (gcc regno = 11)
1421 15 for %st(4) (gcc regno = 12)
1422 16 for %st(5) (gcc regno = 13)
1423 17 for %st(6) (gcc regno = 14)
1424 18 for %st(7) (gcc regno = 15)
1426 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1428 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1429 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1430 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1431 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1432 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1433 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1434 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1437 /* Test and compare insns in i386.md store the information needed to
1438 generate branch and scc insns here. */
1440 rtx ix86_compare_op0 = NULL_RTX;
1441 rtx ix86_compare_op1 = NULL_RTX;
1442 rtx ix86_compare_emitted = NULL_RTX;
1444 /* Size of the register save area. */
1445 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1447 /* Define the structure for the machine field in struct function. */
1449 struct stack_local_entry GTY(())
1451 unsigned short mode;
1454 struct stack_local_entry *next;
1457 /* Structure describing stack frame layout.
1458 Stack grows downward:
1464 saved frame pointer if frame_pointer_needed
1465 <- HARD_FRAME_POINTER
1470 [va_arg registers] (
1471 > to_allocate <- FRAME_POINTER
1481 HOST_WIDE_INT frame;
1483 int outgoing_arguments_size;
1486 HOST_WIDE_INT to_allocate;
1487 /* The offsets relative to ARG_POINTER. */
1488 HOST_WIDE_INT frame_pointer_offset;
1489 HOST_WIDE_INT hard_frame_pointer_offset;
1490 HOST_WIDE_INT stack_pointer_offset;
1492 /* When save_regs_using_mov is set, emit prologue using
1493 move instead of push instructions. */
1494 bool save_regs_using_mov;
1497 /* Code model option. */
1498 enum cmodel ix86_cmodel;
1500 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1502 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1504 /* Which unit we are generating floating point math for. */
1505 enum fpmath_unit ix86_fpmath;
1507 /* Which cpu are we scheduling for. */
1508 enum processor_type ix86_tune;
1510 /* Which instruction set architecture to use. */
1511 enum processor_type ix86_arch;
1513 /* true if sse prefetch instruction is not NOOP. */
1514 int x86_prefetch_sse;
1516 /* ix86_regparm_string as a number */
1517 static int ix86_regparm;
1519 /* -mstackrealign option */
1520 extern int ix86_force_align_arg_pointer;
1521 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1523 /* Preferred alignment for stack boundary in bits. */
1524 unsigned int ix86_preferred_stack_boundary;
1526 /* Values 1-5: see jump.c */
1527 int ix86_branch_cost;
1529 /* Variables which are this size or smaller are put in the data/bss
1530 or ldata/lbss sections. */
1532 int ix86_section_threshold = 65536;
1534 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1535 char internal_label_prefix[16];
1536 int internal_label_prefix_len;
1538 /* Fence to use after loop using movnt. */
1541 /* Register class used for passing given 64bit part of the argument.
1542 These represent classes as documented by the PS ABI, with the exception
1543 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1544 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1546 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1547 whenever possible (upper half does contain padding). */
1548 enum x86_64_reg_class
1551 X86_64_INTEGER_CLASS,
1552 X86_64_INTEGERSI_CLASS,
1559 X86_64_COMPLEX_X87_CLASS,
1562 static const char * const x86_64_reg_class_name[] =
1564 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1565 "sseup", "x87", "x87up", "cplx87", "no"
1568 #define MAX_CLASSES 4
1570 /* Table of constants used by fldpi, fldln2, etc.... */
1571 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1572 static bool ext_80387_constants_init = 0;
1575 static struct machine_function * ix86_init_machine_status (void);
1576 static rtx ix86_function_value (tree, tree, bool);
1577 static int ix86_function_regparm (tree, tree);
1578 static void ix86_compute_frame_layout (struct ix86_frame *);
1579 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1583 /* The svr4 ABI for the i386 says that records and unions are returned
1585 #ifndef DEFAULT_PCC_STRUCT_RETURN
1586 #define DEFAULT_PCC_STRUCT_RETURN 1
1589 /* Bit flags that specify the ISA we are compiling for. */
1590 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1592 /* A mask of ix86_isa_flags that includes bit X if X
1593 was set or cleared on the command line. */
1594 static int ix86_isa_flags_explicit;
1596 /* Define a set of ISAs which aren't available for a given ISA. MMX
1597 and SSE ISAs are handled separately. */
1599 #define OPTION_MASK_ISA_MMX_UNSET \
1600 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1601 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1603 #define OPTION_MASK_ISA_SSE_UNSET \
1604 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1605 #define OPTION_MASK_ISA_SSE2_UNSET \
1606 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1607 #define OPTION_MASK_ISA_SSE3_UNSET \
1608 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1609 #define OPTION_MASK_ISA_SSSE3_UNSET \
1610 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1611 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1612 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1613 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1615 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1616 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1617 #define OPTION_MASK_ISA_SSE4 \
1618 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1619 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1621 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1623 /* Implement TARGET_HANDLE_OPTION. */
1626 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1631 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1634 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1635 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1640 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1643 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1644 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1652 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1655 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1656 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1661 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1664 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1665 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1670 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1673 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1674 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1679 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1682 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1683 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1688 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1691 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1692 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1697 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1700 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1701 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1706 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1707 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1711 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1712 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1716 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1719 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1720 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1729 /* Sometimes certain combinations of command options do not make
1730 sense on a particular target machine. You can define a macro
1731 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1732 defined, is executed once just after all the command options have
1735 Don't use this macro to turn on various extra optimizations for
1736 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1739 override_options (void)
1742 int ix86_tune_defaulted = 0;
1743 int ix86_arch_specified = 0;
1744 unsigned int ix86_arch_mask, ix86_tune_mask;
1746 /* Comes from final.c -- no real reason to change it. */
1747 #define MAX_CODE_ALIGN 16
1751 const struct processor_costs *cost; /* Processor costs */
1752 const int align_loop; /* Default alignments. */
1753 const int align_loop_max_skip;
1754 const int align_jump;
1755 const int align_jump_max_skip;
1756 const int align_func;
1758 const processor_target_table[PROCESSOR_max] =
1760 {&i386_cost, 4, 3, 4, 3, 4},
1761 {&i486_cost, 16, 15, 16, 15, 16},
1762 {&pentium_cost, 16, 7, 16, 7, 16},
1763 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1764 {&geode_cost, 0, 0, 0, 0, 0},
1765 {&k6_cost, 32, 7, 32, 7, 32},
1766 {&athlon_cost, 16, 7, 16, 7, 16},
1767 {&pentium4_cost, 0, 0, 0, 0, 0},
1768 {&k8_cost, 16, 7, 16, 7, 16},
1769 {&nocona_cost, 0, 0, 0, 0, 0},
1770 {&core2_cost, 16, 10, 16, 10, 16},
1771 {&generic32_cost, 16, 7, 16, 7, 16},
1772 {&generic64_cost, 16, 10, 16, 10, 16},
1773 {&amdfam10_cost, 32, 24, 32, 7, 32}
1776 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1783 PTA_PREFETCH_SSE = 1 << 4,
1785 PTA_3DNOW_A = 1 << 6,
1789 PTA_POPCNT = 1 << 10,
1791 PTA_SSE4A = 1 << 12,
1792 PTA_NO_SAHF = 1 << 13,
1793 PTA_SSE4_1 = 1 << 14,
1794 PTA_SSE4_2 = 1 << 15
1799 const char *const name; /* processor name or nickname. */
1800 const enum processor_type processor;
1801 const unsigned /*enum pta_flags*/ flags;
1803 const processor_alias_table[] =
1805 {"i386", PROCESSOR_I386, 0},
1806 {"i486", PROCESSOR_I486, 0},
1807 {"i586", PROCESSOR_PENTIUM, 0},
1808 {"pentium", PROCESSOR_PENTIUM, 0},
1809 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1810 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1811 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1812 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1813 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1814 {"i686", PROCESSOR_PENTIUMPRO, 0},
1815 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1816 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1817 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1818 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1819 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
1820 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
1821 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
1822 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
1823 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
1824 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1825 | PTA_CX16 | PTA_NO_SAHF)},
1826 {"core2", PROCESSOR_CORE2, (PTA_64BIT
1827 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1830 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1831 |PTA_PREFETCH_SSE)},
1832 {"k6", PROCESSOR_K6, PTA_MMX},
1833 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1834 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1835 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1836 | PTA_PREFETCH_SSE)},
1837 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1838 | PTA_PREFETCH_SSE)},
1839 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1841 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1843 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1845 {"x86-64", PROCESSOR_K8, (PTA_64BIT
1846 | PTA_MMX | PTA_SSE | PTA_SSE2
1848 {"k8", PROCESSOR_K8, (PTA_64BIT
1849 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1850 | PTA_SSE | PTA_SSE2
1852 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
1853 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1854 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1856 {"opteron", PROCESSOR_K8, (PTA_64BIT
1857 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1858 | PTA_SSE | PTA_SSE2
1860 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
1861 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1862 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1864 {"athlon64", PROCESSOR_K8, (PTA_64BIT
1865 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1866 | PTA_SSE | PTA_SSE2
1868 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
1869 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1870 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1872 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
1873 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1874 | PTA_SSE | PTA_SSE2
1876 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
1877 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1878 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1880 | PTA_CX16 | PTA_ABM)},
1881 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
1882 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1883 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1885 | PTA_CX16 | PTA_ABM)},
1886 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1887 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1890 int const pta_size = ARRAY_SIZE (processor_alias_table);
1892 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1893 SUBTARGET_OVERRIDE_OPTIONS;
1896 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1897 SUBSUBTARGET_OVERRIDE_OPTIONS;
1900 /* -fPIC is the default for x86_64. */
1901 if (TARGET_MACHO && TARGET_64BIT)
1904 /* Set the default values for switches whose default depends on TARGET_64BIT
1905 in case they weren't overwritten by command line options. */
1908 /* Mach-O doesn't support omitting the frame pointer for now. */
1909 if (flag_omit_frame_pointer == 2)
1910 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1911 if (flag_asynchronous_unwind_tables == 2)
1912 flag_asynchronous_unwind_tables = 1;
1913 if (flag_pcc_struct_return == 2)
1914 flag_pcc_struct_return = 0;
1918 if (flag_omit_frame_pointer == 2)
1919 flag_omit_frame_pointer = 0;
1920 if (flag_asynchronous_unwind_tables == 2)
1921 flag_asynchronous_unwind_tables = 0;
1922 if (flag_pcc_struct_return == 2)
1923 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1926 /* Need to check -mtune=generic first. */
1927 if (ix86_tune_string)
1929 if (!strcmp (ix86_tune_string, "generic")
1930 || !strcmp (ix86_tune_string, "i686")
1931 /* As special support for cross compilers we read -mtune=native
1932 as -mtune=generic. With native compilers we won't see the
1933 -mtune=native, as it was changed by the driver. */
1934 || !strcmp (ix86_tune_string, "native"))
1937 ix86_tune_string = "generic64";
1939 ix86_tune_string = "generic32";
1941 else if (!strncmp (ix86_tune_string, "generic", 7))
1942 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1946 if (ix86_arch_string)
1947 ix86_tune_string = ix86_arch_string;
1948 if (!ix86_tune_string)
1950 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1951 ix86_tune_defaulted = 1;
1954 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1955 need to use a sensible tune option. */
1956 if (!strcmp (ix86_tune_string, "generic")
1957 || !strcmp (ix86_tune_string, "x86-64")
1958 || !strcmp (ix86_tune_string, "i686"))
1961 ix86_tune_string = "generic64";
1963 ix86_tune_string = "generic32";
1966 if (ix86_stringop_string)
1968 if (!strcmp (ix86_stringop_string, "rep_byte"))
1969 stringop_alg = rep_prefix_1_byte;
1970 else if (!strcmp (ix86_stringop_string, "libcall"))
1971 stringop_alg = libcall;
1972 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1973 stringop_alg = rep_prefix_4_byte;
1974 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1975 stringop_alg = rep_prefix_8_byte;
1976 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1977 stringop_alg = loop_1_byte;
1978 else if (!strcmp (ix86_stringop_string, "loop"))
1979 stringop_alg = loop;
1980 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1981 stringop_alg = unrolled_loop;
1983 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1985 if (!strcmp (ix86_tune_string, "x86-64"))
1986 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1987 "-mtune=generic instead as appropriate.");
1989 if (!ix86_arch_string)
1990 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1992 ix86_arch_specified = 1;
1994 if (!strcmp (ix86_arch_string, "generic"))
1995 error ("generic CPU can be used only for -mtune= switch");
1996 if (!strncmp (ix86_arch_string, "generic", 7))
1997 error ("bad value (%s) for -march= switch", ix86_arch_string);
1999 if (ix86_cmodel_string != 0)
2001 if (!strcmp (ix86_cmodel_string, "small"))
2002 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2003 else if (!strcmp (ix86_cmodel_string, "medium"))
2004 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2005 else if (!strcmp (ix86_cmodel_string, "large"))
2006 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2008 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2009 else if (!strcmp (ix86_cmodel_string, "32"))
2010 ix86_cmodel = CM_32;
2011 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2012 ix86_cmodel = CM_KERNEL;
2014 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2018 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2019 use of rip-relative addressing. This eliminates fixups that
2020 would otherwise be needed if this object is to be placed in a
2021 DLL, and is essentially just as efficient as direct addressing. */
2022 if (TARGET_64BIT_MS_ABI)
2023 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2024 else if (TARGET_64BIT)
2025 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2027 ix86_cmodel = CM_32;
2029 if (ix86_asm_string != 0)
2032 && !strcmp (ix86_asm_string, "intel"))
2033 ix86_asm_dialect = ASM_INTEL;
2034 else if (!strcmp (ix86_asm_string, "att"))
2035 ix86_asm_dialect = ASM_ATT;
2037 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2039 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2040 error ("code model %qs not supported in the %s bit mode",
2041 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2042 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2043 sorry ("%i-bit mode not compiled in",
2044 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2046 for (i = 0; i < pta_size; i++)
2047 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2049 ix86_arch = processor_alias_table[i].processor;
2050 /* Default cpu tuning to the architecture. */
2051 ix86_tune = ix86_arch;
2053 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2054 error ("CPU you selected does not support x86-64 "
2057 if (processor_alias_table[i].flags & PTA_MMX
2058 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2059 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2060 if (processor_alias_table[i].flags & PTA_3DNOW
2061 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2062 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2063 if (processor_alias_table[i].flags & PTA_3DNOW_A
2064 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2065 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2066 if (processor_alias_table[i].flags & PTA_SSE
2067 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2068 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2069 if (processor_alias_table[i].flags & PTA_SSE2
2070 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2071 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2072 if (processor_alias_table[i].flags & PTA_SSE3
2073 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2074 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2075 if (processor_alias_table[i].flags & PTA_SSSE3
2076 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2077 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2078 if (processor_alias_table[i].flags & PTA_SSE4_1
2079 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2080 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2081 if (processor_alias_table[i].flags & PTA_SSE4_2
2082 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2083 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2084 if (processor_alias_table[i].flags & PTA_SSE4A
2085 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2086 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2088 if (processor_alias_table[i].flags & PTA_ABM)
2090 if (processor_alias_table[i].flags & PTA_CX16)
2091 x86_cmpxchg16b = true;
2092 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2094 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2095 x86_prefetch_sse = true;
2096 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2103 error ("bad value (%s) for -march= switch", ix86_arch_string);
2105 ix86_arch_mask = 1u << ix86_arch;
2106 for (i = 0; i < X86_ARCH_LAST; ++i)
2107 ix86_arch_features[i] &= ix86_arch_mask;
2109 for (i = 0; i < pta_size; i++)
2110 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2112 ix86_tune = processor_alias_table[i].processor;
2113 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2115 if (ix86_tune_defaulted)
2117 ix86_tune_string = "x86-64";
2118 for (i = 0; i < pta_size; i++)
2119 if (! strcmp (ix86_tune_string,
2120 processor_alias_table[i].name))
2122 ix86_tune = processor_alias_table[i].processor;
2125 error ("CPU you selected does not support x86-64 "
2128 /* Intel CPUs have always interpreted SSE prefetch instructions as
2129 NOPs; so, we can enable SSE prefetch instructions even when
2130 -mtune (rather than -march) points us to a processor that has them.
2131 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2132 higher processors. */
2134 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2135 x86_prefetch_sse = true;
2139 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2141 ix86_tune_mask = 1u << ix86_tune;
2142 for (i = 0; i < X86_TUNE_LAST; ++i)
2143 ix86_tune_features[i] &= ix86_tune_mask;
2146 ix86_cost = &size_cost;
2148 ix86_cost = processor_target_table[ix86_tune].cost;
2150 /* Arrange to set up i386_stack_locals for all functions. */
2151 init_machine_status = ix86_init_machine_status;
2153 /* Validate -mregparm= value. */
2154 if (ix86_regparm_string)
2157 warning (0, "-mregparm is ignored in 64-bit mode");
2158 i = atoi (ix86_regparm_string);
2159 if (i < 0 || i > REGPARM_MAX)
2160 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2165 ix86_regparm = REGPARM_MAX;
2167 /* If the user has provided any of the -malign-* options,
2168 warn and use that value only if -falign-* is not set.
2169 Remove this code in GCC 3.2 or later. */
2170 if (ix86_align_loops_string)
2172 warning (0, "-malign-loops is obsolete, use -falign-loops");
2173 if (align_loops == 0)
2175 i = atoi (ix86_align_loops_string);
2176 if (i < 0 || i > MAX_CODE_ALIGN)
2177 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2179 align_loops = 1 << i;
2183 if (ix86_align_jumps_string)
2185 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2186 if (align_jumps == 0)
2188 i = atoi (ix86_align_jumps_string);
2189 if (i < 0 || i > MAX_CODE_ALIGN)
2190 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2192 align_jumps = 1 << i;
2196 if (ix86_align_funcs_string)
2198 warning (0, "-malign-functions is obsolete, use -falign-functions");
2199 if (align_functions == 0)
2201 i = atoi (ix86_align_funcs_string);
2202 if (i < 0 || i > MAX_CODE_ALIGN)
2203 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2205 align_functions = 1 << i;
2209 /* Default align_* from the processor table. */
2210 if (align_loops == 0)
2212 align_loops = processor_target_table[ix86_tune].align_loop;
2213 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2215 if (align_jumps == 0)
2217 align_jumps = processor_target_table[ix86_tune].align_jump;
2218 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2220 if (align_functions == 0)
2222 align_functions = processor_target_table[ix86_tune].align_func;
2225 /* Validate -mbranch-cost= value, or provide default. */
2226 ix86_branch_cost = ix86_cost->branch_cost;
2227 if (ix86_branch_cost_string)
2229 i = atoi (ix86_branch_cost_string);
2231 error ("-mbranch-cost=%d is not between 0 and 5", i);
2233 ix86_branch_cost = i;
2235 if (ix86_section_threshold_string)
2237 i = atoi (ix86_section_threshold_string);
2239 error ("-mlarge-data-threshold=%d is negative", i);
2241 ix86_section_threshold = i;
2244 if (ix86_tls_dialect_string)
2246 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2247 ix86_tls_dialect = TLS_DIALECT_GNU;
2248 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2249 ix86_tls_dialect = TLS_DIALECT_GNU2;
2250 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2251 ix86_tls_dialect = TLS_DIALECT_SUN;
2253 error ("bad value (%s) for -mtls-dialect= switch",
2254 ix86_tls_dialect_string);
2257 if (ix87_precision_string)
2259 i = atoi (ix87_precision_string);
2260 if (i != 32 && i != 64 && i != 80)
2261 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2266 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2268 /* Enable by default the SSE and MMX builtins. Do allow the user to
2269 explicitly disable any of these. In particular, disabling SSE and
2270 MMX for kernel code is extremely useful. */
2271 if (!ix86_arch_specified)
2273 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2274 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2277 warning (0, "-mrtd is ignored in 64bit mode");
2281 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2283 if (!ix86_arch_specified)
2285 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2287 /* i386 ABI does not specify red zone. It still makes sense to use it
2288 when programmer takes care to stack from being destroyed. */
2289 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2290 target_flags |= MASK_NO_RED_ZONE;
2293 /* Keep nonleaf frame pointers. */
2294 if (flag_omit_frame_pointer)
2295 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2296 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2297 flag_omit_frame_pointer = 1;
2299 /* If we're doing fast math, we don't care about comparison order
2300 wrt NaNs. This lets us use a shorter comparison sequence. */
2301 if (flag_finite_math_only)
2302 target_flags &= ~MASK_IEEE_FP;
2304 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2305 since the insns won't need emulation. */
2306 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2307 target_flags &= ~MASK_NO_FANCY_MATH_387;
2309 /* Likewise, if the target doesn't have a 387, or we've specified
2310 software floating point, don't use 387 inline intrinsics. */
2312 target_flags |= MASK_NO_FANCY_MATH_387;
2314 /* Turn on SSE4.1 builtins for -msse4.2. */
2316 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2318 /* Turn on SSSE3 builtins for -msse4.1. */
2320 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2322 /* Turn on SSE3 builtins for -mssse3. */
2324 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2326 /* Turn on SSE3 builtins for -msse4a. */
2328 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2330 /* Turn on SSE2 builtins for -msse3. */
2332 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2334 /* Turn on SSE builtins for -msse2. */
2336 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2338 /* Turn on MMX builtins for -msse. */
2341 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2342 x86_prefetch_sse = true;
2345 /* Turn on MMX builtins for 3Dnow. */
2347 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2349 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2350 if (TARGET_SSE4_2 || TARGET_ABM)
2353 /* Validate -mpreferred-stack-boundary= value, or provide default.
2354 The default of 128 bits is for Pentium III's SSE __m128. We can't
2355 change it because of optimize_size. Otherwise, we can't mix object
2356 files compiled with -Os and -On. */
2357 ix86_preferred_stack_boundary = 128;
2358 if (ix86_preferred_stack_boundary_string)
2360 i = atoi (ix86_preferred_stack_boundary_string);
2361 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2362 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2363 TARGET_64BIT ? 4 : 2);
2365 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2368 /* Accept -msseregparm only if at least SSE support is enabled. */
2369 if (TARGET_SSEREGPARM
2371 error ("-msseregparm used without SSE enabled");
2373 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2374 if (ix86_fpmath_string != 0)
2376 if (! strcmp (ix86_fpmath_string, "387"))
2377 ix86_fpmath = FPMATH_387;
2378 else if (! strcmp (ix86_fpmath_string, "sse"))
2382 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2383 ix86_fpmath = FPMATH_387;
2386 ix86_fpmath = FPMATH_SSE;
2388 else if (! strcmp (ix86_fpmath_string, "387,sse")
2389 || ! strcmp (ix86_fpmath_string, "sse,387"))
2393 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2394 ix86_fpmath = FPMATH_387;
2396 else if (!TARGET_80387)
2398 warning (0, "387 instruction set disabled, using SSE arithmetics");
2399 ix86_fpmath = FPMATH_SSE;
2402 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2405 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2408 /* If the i387 is disabled, then do not return values in it. */
2410 target_flags &= ~MASK_FLOAT_RETURNS;
2412 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2413 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2415 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2417 /* ??? Unwind info is not correct around the CFG unless either a frame
2418 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2419 unwind info generation to be aware of the CFG and propagating states
2421 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2422 || flag_exceptions || flag_non_call_exceptions)
2423 && flag_omit_frame_pointer
2424 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2426 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2427 warning (0, "unwind tables currently require either a frame pointer "
2428 "or -maccumulate-outgoing-args for correctness");
2429 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2432 /* For sane SSE instruction set generation we need fcomi instruction.
2433 It is safe to enable all CMOVE instructions. */
2437 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2440 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2441 p = strchr (internal_label_prefix, 'X');
2442 internal_label_prefix_len = p - internal_label_prefix;
2446 /* When scheduling description is not available, disable scheduler pass
2447 so it won't slow down the compilation and make x87 code slower. */
2448 if (!TARGET_SCHEDULE)
2449 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2451 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2452 set_param_value ("simultaneous-prefetches",
2453 ix86_cost->simultaneous_prefetches);
2454 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2455 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2456 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2457 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2458 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2459 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2462 /* Return true if this goes in large data/bss. */
2465 ix86_in_large_data_p (tree exp)
2467 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2470 /* Functions are never large data. */
2471 if (TREE_CODE (exp) == FUNCTION_DECL)
2474 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2476 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2477 if (strcmp (section, ".ldata") == 0
2478 || strcmp (section, ".lbss") == 0)
2484 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2486 /* If this is an incomplete type with size 0, then we can't put it
2487 in data because it might be too big when completed. */
2488 if (!size || size > ix86_section_threshold)
2495 /* Switch to the appropriate section for output of DECL.
2496 DECL is either a `VAR_DECL' node or a constant of some sort.
2497 RELOC indicates whether forming the initial value of DECL requires
2498 link-time relocations. */
2500 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2504 x86_64_elf_select_section (tree decl, int reloc,
2505 unsigned HOST_WIDE_INT align)
2507 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2508 && ix86_in_large_data_p (decl))
2510 const char *sname = NULL;
2511 unsigned int flags = SECTION_WRITE;
2512 switch (categorize_decl_for_section (decl, reloc))
2517 case SECCAT_DATA_REL:
2518 sname = ".ldata.rel";
2520 case SECCAT_DATA_REL_LOCAL:
2521 sname = ".ldata.rel.local";
2523 case SECCAT_DATA_REL_RO:
2524 sname = ".ldata.rel.ro";
2526 case SECCAT_DATA_REL_RO_LOCAL:
2527 sname = ".ldata.rel.ro.local";
2531 flags |= SECTION_BSS;
2534 case SECCAT_RODATA_MERGE_STR:
2535 case SECCAT_RODATA_MERGE_STR_INIT:
2536 case SECCAT_RODATA_MERGE_CONST:
2540 case SECCAT_SRODATA:
2547 /* We don't split these for medium model. Place them into
2548 default sections and hope for best. */
2553 /* We might get called with string constants, but get_named_section
2554 doesn't like them as they are not DECLs. Also, we need to set
2555 flags in that case. */
2557 return get_section (sname, flags, NULL);
2558 return get_named_section (decl, sname, reloc);
2561 return default_elf_select_section (decl, reloc, align);
2564 /* Build up a unique section name, expressed as a
2565 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2566 RELOC indicates whether the initial value of EXP requires
2567 link-time relocations. */
2569 static void ATTRIBUTE_UNUSED
2570 x86_64_elf_unique_section (tree decl, int reloc)
2572 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2573 && ix86_in_large_data_p (decl))
2575 const char *prefix = NULL;
2576 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2577 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2579 switch (categorize_decl_for_section (decl, reloc))
2582 case SECCAT_DATA_REL:
2583 case SECCAT_DATA_REL_LOCAL:
2584 case SECCAT_DATA_REL_RO:
2585 case SECCAT_DATA_REL_RO_LOCAL:
2586 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2589 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2592 case SECCAT_RODATA_MERGE_STR:
2593 case SECCAT_RODATA_MERGE_STR_INIT:
2594 case SECCAT_RODATA_MERGE_CONST:
2595 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2597 case SECCAT_SRODATA:
2604 /* We don't split these for medium model. Place them into
2605 default sections and hope for best. */
2613 plen = strlen (prefix);
2615 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2616 name = targetm.strip_name_encoding (name);
2617 nlen = strlen (name);
2619 string = (char *) alloca (nlen + plen + 1);
2620 memcpy (string, prefix, plen);
2621 memcpy (string + plen, name, nlen + 1);
2623 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2627 default_unique_section (decl, reloc);
2630 #ifdef COMMON_ASM_OP
2631 /* This says how to output assembler code to declare an
2632 uninitialized external linkage data object.
2634 For medium model x86-64 we need to use .largecomm opcode for
2637 x86_elf_aligned_common (FILE *file,
2638 const char *name, unsigned HOST_WIDE_INT size,
2641 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2642 && size > (unsigned int)ix86_section_threshold)
2643 fprintf (file, ".largecomm\t");
2645 fprintf (file, "%s", COMMON_ASM_OP);
2646 assemble_name (file, name);
2647 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2648 size, align / BITS_PER_UNIT);
2652 /* Utility function for targets to use in implementing
2653 ASM_OUTPUT_ALIGNED_BSS. */
2656 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2657 const char *name, unsigned HOST_WIDE_INT size,
2660 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2661 && size > (unsigned int)ix86_section_threshold)
2662 switch_to_section (get_named_section (decl, ".lbss", 0));
2664 switch_to_section (bss_section);
2665 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2666 #ifdef ASM_DECLARE_OBJECT_NAME
2667 last_assemble_variable_decl = decl;
2668 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2670 /* Standard thing is just output label for the object. */
2671 ASM_OUTPUT_LABEL (file, name);
2672 #endif /* ASM_DECLARE_OBJECT_NAME */
2673 ASM_OUTPUT_SKIP (file, size ? size : 1);
2677 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2679 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2680 make the problem with not enough registers even worse. */
2681 #ifdef INSN_SCHEDULING
2683 flag_schedule_insns = 0;
2687 /* The Darwin libraries never set errno, so we might as well
2688 avoid calling them when that's the only reason we would. */
2689 flag_errno_math = 0;
2691 /* The default values of these switches depend on the TARGET_64BIT
2692 that is not known at this moment. Mark these values with 2 and
2693 let user the to override these. In case there is no command line option
2694 specifying them, we will set the defaults in override_options. */
2696 flag_omit_frame_pointer = 2;
2697 flag_pcc_struct_return = 2;
2698 flag_asynchronous_unwind_tables = 2;
2699 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2700 SUBTARGET_OPTIMIZATION_OPTIONS;
2704 /* Decide whether we can make a sibling call to a function. DECL is the
2705 declaration of the function being targeted by the call and EXP is the
2706 CALL_EXPR representing the call. */
2709 ix86_function_ok_for_sibcall (tree decl, tree exp)
2714 /* If we are generating position-independent code, we cannot sibcall
2715 optimize any indirect call, or a direct call to a global function,
2716 as the PLT requires %ebx be live. */
2717 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2724 func = TREE_TYPE (CALL_EXPR_FN (exp));
2725 if (POINTER_TYPE_P (func))
2726 func = TREE_TYPE (func);
2729 /* Check that the return value locations are the same. Like
2730 if we are returning floats on the 80387 register stack, we cannot
2731 make a sibcall from a function that doesn't return a float to a
2732 function that does or, conversely, from a function that does return
2733 a float to a function that doesn't; the necessary stack adjustment
2734 would not be executed. This is also the place we notice
2735 differences in the return value ABI. Note that it is ok for one
2736 of the functions to have void return type as long as the return
2737 value of the other is passed in a register. */
2738 a = ix86_function_value (TREE_TYPE (exp), func, false);
2739 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2741 if (STACK_REG_P (a) || STACK_REG_P (b))
2743 if (!rtx_equal_p (a, b))
2746 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2748 else if (!rtx_equal_p (a, b))
2751 /* If this call is indirect, we'll need to be able to use a call-clobbered
2752 register for the address of the target function. Make sure that all
2753 such registers are not used for passing parameters. */
2754 if (!decl && !TARGET_64BIT)
2758 /* We're looking at the CALL_EXPR, we need the type of the function. */
2759 type = CALL_EXPR_FN (exp); /* pointer expression */
2760 type = TREE_TYPE (type); /* pointer type */
2761 type = TREE_TYPE (type); /* function type */
2763 if (ix86_function_regparm (type, NULL) >= 3)
2765 /* ??? Need to count the actual number of registers to be used,
2766 not the possible number of registers. Fix later. */
2771 /* Dllimport'd functions are also called indirectly. */
2772 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2773 && decl && DECL_DLLIMPORT_P (decl)
2774 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2777 /* If we forced aligned the stack, then sibcalling would unalign the
2778 stack, which may break the called function. */
2779 if (cfun->machine->force_align_arg_pointer)
2782 /* Otherwise okay. That also includes certain types of indirect calls. */
2786 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2787 calling convention attributes;
2788 arguments as in struct attribute_spec.handler. */
2791 ix86_handle_cconv_attribute (tree *node, tree name,
2793 int flags ATTRIBUTE_UNUSED,
2796 if (TREE_CODE (*node) != FUNCTION_TYPE
2797 && TREE_CODE (*node) != METHOD_TYPE
2798 && TREE_CODE (*node) != FIELD_DECL
2799 && TREE_CODE (*node) != TYPE_DECL)
2801 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2802 IDENTIFIER_POINTER (name));
2803 *no_add_attrs = true;
2807 /* Can combine regparm with all attributes but fastcall. */
2808 if (is_attribute_p ("regparm", name))
2812 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2814 error ("fastcall and regparm attributes are not compatible");
2817 cst = TREE_VALUE (args);
2818 if (TREE_CODE (cst) != INTEGER_CST)
2820 warning (OPT_Wattributes,
2821 "%qs attribute requires an integer constant argument",
2822 IDENTIFIER_POINTER (name));
2823 *no_add_attrs = true;
2825 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2827 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2828 IDENTIFIER_POINTER (name), REGPARM_MAX);
2829 *no_add_attrs = true;
2833 && lookup_attribute (ix86_force_align_arg_pointer_string,
2834 TYPE_ATTRIBUTES (*node))
2835 && compare_tree_int (cst, REGPARM_MAX-1))
2837 error ("%s functions limited to %d register parameters",
2838 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2846 /* Do not warn when emulating the MS ABI. */
2847 if (!TARGET_64BIT_MS_ABI)
2848 warning (OPT_Wattributes, "%qs attribute ignored",
2849 IDENTIFIER_POINTER (name));
2850 *no_add_attrs = true;
2854 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2855 if (is_attribute_p ("fastcall", name))
2857 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2859 error ("fastcall and cdecl attributes are not compatible");
2861 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2863 error ("fastcall and stdcall attributes are not compatible");
2865 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2867 error ("fastcall and regparm attributes are not compatible");
2871 /* Can combine stdcall with fastcall (redundant), regparm and
2873 else if (is_attribute_p ("stdcall", name))
2875 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2877 error ("stdcall and cdecl attributes are not compatible");
2879 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2881 error ("stdcall and fastcall attributes are not compatible");
2885 /* Can combine cdecl with regparm and sseregparm. */
2886 else if (is_attribute_p ("cdecl", name))
2888 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2890 error ("stdcall and cdecl attributes are not compatible");
2892 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2894 error ("fastcall and cdecl attributes are not compatible");
2898 /* Can combine sseregparm with all attributes. */
2903 /* Return 0 if the attributes for two types are incompatible, 1 if they
2904 are compatible, and 2 if they are nearly compatible (which causes a
2905 warning to be generated). */
2908 ix86_comp_type_attributes (tree type1, tree type2)
2910 /* Check for mismatch of non-default calling convention. */
2911 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2913 if (TREE_CODE (type1) != FUNCTION_TYPE)
2916 /* Check for mismatched fastcall/regparm types. */
2917 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2918 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2919 || (ix86_function_regparm (type1, NULL)
2920 != ix86_function_regparm (type2, NULL)))
2923 /* Check for mismatched sseregparm types. */
2924 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2925 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2928 /* Check for mismatched return types (cdecl vs stdcall). */
2929 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2930 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2936 /* Return the regparm value for a function with the indicated TYPE and DECL.
2937 DECL may be NULL when calling function indirectly
2938 or considering a libcall. */
2941 ix86_function_regparm (tree type, tree decl)
2944 int regparm = ix86_regparm;
2949 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2951 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2953 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2956 /* Use register calling convention for local functions when possible. */
2957 if (decl && TREE_CODE (decl) == FUNCTION_DECL
2958 && flag_unit_at_a_time && !profile_flag)
2960 struct cgraph_local_info *i = cgraph_local_info (decl);
2963 int local_regparm, globals = 0, regno;
2966 /* Make sure no regparm register is taken by a
2967 global register variable. */
2968 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2969 if (global_regs[local_regparm])
2972 /* We can't use regparm(3) for nested functions as these use
2973 static chain pointer in third argument. */
2974 if (local_regparm == 3
2975 && (decl_function_context (decl)
2976 || ix86_force_align_arg_pointer)
2977 && !DECL_NO_STATIC_CHAIN (decl))
2980 /* If the function realigns its stackpointer, the prologue will
2981 clobber %ecx. If we've already generated code for the callee,
2982 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2983 scanning the attributes for the self-realigning property. */
2984 f = DECL_STRUCT_FUNCTION (decl);
2985 if (local_regparm == 3
2986 && (f ? !!f->machine->force_align_arg_pointer
2987 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2988 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2991 /* Each global register variable increases register preassure,
2992 so the more global reg vars there are, the smaller regparm
2993 optimization use, unless requested by the user explicitly. */
2994 for (regno = 0; regno < 6; regno++)
2995 if (global_regs[regno])
2998 = globals < local_regparm ? local_regparm - globals : 0;
3000 if (local_regparm > regparm)
3001 regparm = local_regparm;
3008 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3009 DFmode (2) arguments in SSE registers for a function with the
3010 indicated TYPE and DECL. DECL may be NULL when calling function
3011 indirectly or considering a libcall. Otherwise return 0. */
3014 ix86_function_sseregparm (tree type, tree decl)
3016 gcc_assert (!TARGET_64BIT);
3018 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3019 by the sseregparm attribute. */
3020 if (TARGET_SSEREGPARM
3021 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3026 error ("Calling %qD with attribute sseregparm without "
3027 "SSE/SSE2 enabled", decl);
3029 error ("Calling %qT with attribute sseregparm without "
3030 "SSE/SSE2 enabled", type);
3037 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3038 (and DFmode for SSE2) arguments in SSE registers. */
3039 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3041 struct cgraph_local_info *i = cgraph_local_info (decl);
3043 return TARGET_SSE2 ? 2 : 1;
3049 /* Return true if EAX is live at the start of the function. Used by
3050 ix86_expand_prologue to determine if we need special help before
3051 calling allocate_stack_worker. */
3054 ix86_eax_live_at_start_p (void)
3056 /* Cheat. Don't bother working forward from ix86_function_regparm
3057 to the function type to whether an actual argument is located in
3058 eax. Instead just look at cfg info, which is still close enough
3059 to correct at this point. This gives false positives for broken
3060 functions that might use uninitialized data that happens to be
3061 allocated in eax, but who cares? */
3062 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3065 /* Return true if TYPE has a variable argument list. */
3068 type_has_variadic_args_p (tree type)
3070 tree n, t = TYPE_ARG_TYPES (type);
3075 while ((n = TREE_CHAIN (t)) != NULL)
3078 return TREE_VALUE (t) != void_type_node;
3081 /* Value is the number of bytes of arguments automatically
3082 popped when returning from a subroutine call.
3083 FUNDECL is the declaration node of the function (as a tree),
3084 FUNTYPE is the data type of the function (as a tree),
3085 or for a library call it is an identifier node for the subroutine name.
3086 SIZE is the number of bytes of arguments passed on the stack.
3088 On the 80386, the RTD insn may be used to pop them if the number
3089 of args is fixed, but if the number is variable then the caller
3090 must pop them all. RTD can't be used for library calls now
3091 because the library is compiled with the Unix compiler.
3092 Use of RTD is a selectable option, since it is incompatible with
3093 standard Unix calling sequences. If the option is not selected,
3094 the caller must always pop the args.
3096 The attribute stdcall is equivalent to RTD on a per module basis. */
3099 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3103 /* None of the 64-bit ABIs pop arguments. */
3107 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3109 /* Cdecl functions override -mrtd, and never pop the stack. */
3110 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3112 /* Stdcall and fastcall functions will pop the stack if not
3114 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3115 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3118 if (rtd && ! type_has_variadic_args_p (funtype))
3122 /* Lose any fake structure return argument if it is passed on the stack. */
3123 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3124 && !KEEP_AGGREGATE_RETURN_POINTER)
3126 int nregs = ix86_function_regparm (funtype, fundecl);
3128 return GET_MODE_SIZE (Pmode);
3134 /* Argument support functions. */
3136 /* Return true when register may be used to pass function parameters. */
3138 ix86_function_arg_regno_p (int regno)
3141 const int *parm_regs;
3146 return (regno < REGPARM_MAX
3147 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3149 return (regno < REGPARM_MAX
3150 || (TARGET_MMX && MMX_REGNO_P (regno)
3151 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3152 || (TARGET_SSE && SSE_REGNO_P (regno)
3153 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3158 if (SSE_REGNO_P (regno) && TARGET_SSE)
3163 if (TARGET_SSE && SSE_REGNO_P (regno)
3164 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3168 /* RAX is used as hidden argument to va_arg functions. */
3169 if (!TARGET_64BIT_MS_ABI && regno == 0)
3172 if (TARGET_64BIT_MS_ABI)
3173 parm_regs = x86_64_ms_abi_int_parameter_registers;
3175 parm_regs = x86_64_int_parameter_registers;
3176 for (i = 0; i < REGPARM_MAX; i++)
3177 if (regno == parm_regs[i])
3182 /* Return if we do not know how to pass TYPE solely in registers. */
3185 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3187 if (must_pass_in_stack_var_size_or_pad (mode, type))
3190 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3191 The layout_type routine is crafty and tries to trick us into passing
3192 currently unsupported vector types on the stack by using TImode. */
3193 return (!TARGET_64BIT && mode == TImode
3194 && type && TREE_CODE (type) != VECTOR_TYPE);
3197 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3198 for a call to a function whose data type is FNTYPE.
3199 For a library call, FNTYPE is 0. */
3202 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3203 tree fntype, /* tree ptr for function decl */
3204 rtx libname, /* SYMBOL_REF of library name or 0 */
3207 memset (cum, 0, sizeof (*cum));
3209 /* Set up the number of registers to use for passing arguments. */
3210 cum->nregs = ix86_regparm;
3212 cum->sse_nregs = SSE_REGPARM_MAX;
3214 cum->mmx_nregs = MMX_REGPARM_MAX;
3215 cum->warn_sse = true;
3216 cum->warn_mmx = true;
3217 cum->maybe_vaarg = (fntype
3218 ? (!TYPE_ARG_TYPES (fntype)
3219 || type_has_variadic_args_p (fntype))
3224 /* If there are variable arguments, then we won't pass anything
3225 in registers in 32-bit mode. */
3226 if (cum->maybe_vaarg)
3236 /* Use ecx and edx registers if function has fastcall attribute,
3237 else look for regparm information. */
3240 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3246 cum->nregs = ix86_function_regparm (fntype, fndecl);
3249 /* Set up the number of SSE registers used for passing SFmode
3250 and DFmode arguments. Warn for mismatching ABI. */
3251 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3255 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3256 But in the case of vector types, it is some vector mode.
3258 When we have only some of our vector isa extensions enabled, then there
3259 are some modes for which vector_mode_supported_p is false. For these
3260 modes, the generic vector support in gcc will choose some non-vector mode
3261 in order to implement the type. By computing the natural mode, we'll
3262 select the proper ABI location for the operand and not depend on whatever
3263 the middle-end decides to do with these vector types. */
3265 static enum machine_mode
3266 type_natural_mode (tree type)
3268 enum machine_mode mode = TYPE_MODE (type);
3270 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3272 HOST_WIDE_INT size = int_size_in_bytes (type);
3273 if ((size == 8 || size == 16)
3274 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3275 && TYPE_VECTOR_SUBPARTS (type) > 1)
3277 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3279 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3280 mode = MIN_MODE_VECTOR_FLOAT;
3282 mode = MIN_MODE_VECTOR_INT;
3284 /* Get the mode which has this inner mode and number of units. */
3285 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3286 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3287 && GET_MODE_INNER (mode) == innermode)
3297 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3298 this may not agree with the mode that the type system has chosen for the
3299 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3300 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3303 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3308 if (orig_mode != BLKmode)
3309 tmp = gen_rtx_REG (orig_mode, regno);
3312 tmp = gen_rtx_REG (mode, regno);
3313 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3314 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3320 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3321 of this code is to classify each 8bytes of incoming argument by the register
3322 class and assign registers accordingly. */
3324 /* Return the union class of CLASS1 and CLASS2.
3325 See the x86-64 PS ABI for details. */
3327 static enum x86_64_reg_class
3328 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3330 /* Rule #1: If both classes are equal, this is the resulting class. */
3331 if (class1 == class2)
3334 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3336 if (class1 == X86_64_NO_CLASS)
3338 if (class2 == X86_64_NO_CLASS)
3341 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3342 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3343 return X86_64_MEMORY_CLASS;
3345 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3346 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3347 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3348 return X86_64_INTEGERSI_CLASS;
3349 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3350 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3351 return X86_64_INTEGER_CLASS;
3353 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3355 if (class1 == X86_64_X87_CLASS
3356 || class1 == X86_64_X87UP_CLASS
3357 || class1 == X86_64_COMPLEX_X87_CLASS
3358 || class2 == X86_64_X87_CLASS
3359 || class2 == X86_64_X87UP_CLASS
3360 || class2 == X86_64_COMPLEX_X87_CLASS)
3361 return X86_64_MEMORY_CLASS;
3363 /* Rule #6: Otherwise class SSE is used. */
3364 return X86_64_SSE_CLASS;
3367 /* Classify the argument of type TYPE and mode MODE.
3368 CLASSES will be filled by the register class used to pass each word
3369 of the operand. The number of words is returned. In case the parameter
3370 should be passed in memory, 0 is returned. As a special case for zero
3371 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3373 BIT_OFFSET is used internally for handling records and specifies offset
3374 of the offset in bits modulo 256 to avoid overflow cases.
3376 See the x86-64 PS ABI for details.
3380 classify_argument (enum machine_mode mode, tree type,
3381 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3383 HOST_WIDE_INT bytes =
3384 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3385 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3387 /* Variable sized entities are always passed/returned in memory. */
3391 if (mode != VOIDmode
3392 && targetm.calls.must_pass_in_stack (mode, type))
3395 if (type && AGGREGATE_TYPE_P (type))
3399 enum x86_64_reg_class subclasses[MAX_CLASSES];
3401 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3405 for (i = 0; i < words; i++)
3406 classes[i] = X86_64_NO_CLASS;
3408 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3409 signalize memory class, so handle it as special case. */
3412 classes[0] = X86_64_NO_CLASS;
3416 /* Classify each field of record and merge classes. */
3417 switch (TREE_CODE (type))
3420 /* And now merge the fields of structure. */
3421 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3423 if (TREE_CODE (field) == FIELD_DECL)
3427 if (TREE_TYPE (field) == error_mark_node)
3430 /* Bitfields are always classified as integer. Handle them
3431 early, since later code would consider them to be
3432 misaligned integers. */
3433 if (DECL_BIT_FIELD (field))
3435 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3436 i < ((int_bit_position (field) + (bit_offset % 64))
3437 + tree_low_cst (DECL_SIZE (field), 0)
3440 merge_classes (X86_64_INTEGER_CLASS,
3445 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3446 TREE_TYPE (field), subclasses,
3447 (int_bit_position (field)
3448 + bit_offset) % 256);
3451 for (i = 0; i < num; i++)
3454 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3456 merge_classes (subclasses[i], classes[i + pos]);
3464 /* Arrays are handled as small records. */
3467 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3468 TREE_TYPE (type), subclasses, bit_offset);
3472 /* The partial classes are now full classes. */
3473 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3474 subclasses[0] = X86_64_SSE_CLASS;
3475 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3476 subclasses[0] = X86_64_INTEGER_CLASS;
3478 for (i = 0; i < words; i++)
3479 classes[i] = subclasses[i % num];
3484 case QUAL_UNION_TYPE:
3485 /* Unions are similar to RECORD_TYPE but offset is always 0.
3487 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3489 if (TREE_CODE (field) == FIELD_DECL)
3493 if (TREE_TYPE (field) == error_mark_node)
3496 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3497 TREE_TYPE (field), subclasses,
3501 for (i = 0; i < num; i++)
3502 classes[i] = merge_classes (subclasses[i], classes[i]);
3511 /* Final merger cleanup. */
3512 for (i = 0; i < words; i++)
3514 /* If one class is MEMORY, everything should be passed in
3516 if (classes[i] == X86_64_MEMORY_CLASS)
3519 /* The X86_64_SSEUP_CLASS should be always preceded by
3520 X86_64_SSE_CLASS. */
3521 if (classes[i] == X86_64_SSEUP_CLASS
3522 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3523 classes[i] = X86_64_SSE_CLASS;
3525 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3526 if (classes[i] == X86_64_X87UP_CLASS
3527 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3528 classes[i] = X86_64_SSE_CLASS;
3533 /* Compute alignment needed. We align all types to natural boundaries with
3534 exception of XFmode that is aligned to 64bits. */
3535 if (mode != VOIDmode && mode != BLKmode)
3537 int mode_alignment = GET_MODE_BITSIZE (mode);
3540 mode_alignment = 128;
3541 else if (mode == XCmode)
3542 mode_alignment = 256;
3543 if (COMPLEX_MODE_P (mode))
3544 mode_alignment /= 2;
3545 /* Misaligned fields are always returned in memory. */
3546 if (bit_offset % mode_alignment)
3550 /* for V1xx modes, just use the base mode */
3551 if (VECTOR_MODE_P (mode)
3552 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3553 mode = GET_MODE_INNER (mode);
3555 /* Classification of atomic types. */
3560 classes[0] = X86_64_SSE_CLASS;
3563 classes[0] = X86_64_SSE_CLASS;
3564 classes[1] = X86_64_SSEUP_CLASS;
3573 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3574 classes[0] = X86_64_INTEGERSI_CLASS;
3576 classes[0] = X86_64_INTEGER_CLASS;
3580 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3585 if (!(bit_offset % 64))
3586 classes[0] = X86_64_SSESF_CLASS;
3588 classes[0] = X86_64_SSE_CLASS;
3591 classes[0] = X86_64_SSEDF_CLASS;
3594 classes[0] = X86_64_X87_CLASS;
3595 classes[1] = X86_64_X87UP_CLASS;
3598 classes[0] = X86_64_SSE_CLASS;
3599 classes[1] = X86_64_SSEUP_CLASS;
3602 classes[0] = X86_64_SSE_CLASS;
3605 classes[0] = X86_64_SSEDF_CLASS;
3606 classes[1] = X86_64_SSEDF_CLASS;
3609 classes[0] = X86_64_COMPLEX_X87_CLASS;
3612 /* This modes is larger than 16 bytes. */
3620 classes[0] = X86_64_SSE_CLASS;
3621 classes[1] = X86_64_SSEUP_CLASS;
3627 classes[0] = X86_64_SSE_CLASS;
3633 gcc_assert (VECTOR_MODE_P (mode));
3638 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3640 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3641 classes[0] = X86_64_INTEGERSI_CLASS;
3643 classes[0] = X86_64_INTEGER_CLASS;
3644 classes[1] = X86_64_INTEGER_CLASS;
3645 return 1 + (bytes > 8);
3649 /* Examine the argument and return set number of register required in each
3650 class. Return 0 iff parameter should be passed in memory. */
3652 examine_argument (enum machine_mode mode, tree type, int in_return,
3653 int *int_nregs, int *sse_nregs)
3655 enum x86_64_reg_class regclass[MAX_CLASSES];
3656 int n = classify_argument (mode, type, regclass, 0);
3662 for (n--; n >= 0; n--)
3663 switch (regclass[n])
3665 case X86_64_INTEGER_CLASS:
3666 case X86_64_INTEGERSI_CLASS:
3669 case X86_64_SSE_CLASS:
3670 case X86_64_SSESF_CLASS:
3671 case X86_64_SSEDF_CLASS:
3674 case X86_64_NO_CLASS:
3675 case X86_64_SSEUP_CLASS:
3677 case X86_64_X87_CLASS:
3678 case X86_64_X87UP_CLASS:
3682 case X86_64_COMPLEX_X87_CLASS:
3683 return in_return ? 2 : 0;
3684 case X86_64_MEMORY_CLASS:
3690 /* Construct container for the argument used by GCC interface. See
3691 FUNCTION_ARG for the detailed description. */
3694 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3695 tree type, int in_return, int nintregs, int nsseregs,
3696 const int *intreg, int sse_regno)
3698 /* The following variables hold the static issued_error state. */
3699 static bool issued_sse_arg_error;
3700 static bool issued_sse_ret_error;
3701 static bool issued_x87_ret_error;
3703 enum machine_mode tmpmode;
3705 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3706 enum x86_64_reg_class regclass[MAX_CLASSES];
3710 int needed_sseregs, needed_intregs;
3711 rtx exp[MAX_CLASSES];
3714 n = classify_argument (mode, type, regclass, 0);
3717 if (!examine_argument (mode, type, in_return, &needed_intregs,
3720 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3723 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3724 some less clueful developer tries to use floating-point anyway. */
3725 if (needed_sseregs && !TARGET_SSE)
3729 if (!issued_sse_ret_error)
3731 error ("SSE register return with SSE disabled");
3732 issued_sse_ret_error = true;
3735 else if (!issued_sse_arg_error)
3737 error ("SSE register argument with SSE disabled");
3738 issued_sse_arg_error = true;
3743 /* Likewise, error if the ABI requires us to return values in the
3744 x87 registers and the user specified -mno-80387. */
3745 if (!TARGET_80387 && in_return)
3746 for (i = 0; i < n; i++)
3747 if (regclass[i] == X86_64_X87_CLASS
3748 || regclass[i] == X86_64_X87UP_CLASS
3749 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3751 if (!issued_x87_ret_error)
3753 error ("x87 register return with x87 disabled");
3754 issued_x87_ret_error = true;
3759 /* First construct simple cases. Avoid SCmode, since we want to use
3760 single register to pass this type. */
3761 if (n == 1 && mode != SCmode)
3762 switch (regclass[0])
3764 case X86_64_INTEGER_CLASS:
3765 case X86_64_INTEGERSI_CLASS:
3766 return gen_rtx_REG (mode, intreg[0]);
3767 case X86_64_SSE_CLASS:
3768 case X86_64_SSESF_CLASS:
3769 case X86_64_SSEDF_CLASS:
3770 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3771 case X86_64_X87_CLASS:
3772 case X86_64_COMPLEX_X87_CLASS:
3773 return gen_rtx_REG (mode, FIRST_STACK_REG);
3774 case X86_64_NO_CLASS:
3775 /* Zero sized array, struct or class. */
3780 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
3781 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
3782 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3785 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
3786 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3787 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
3788 && regclass[1] == X86_64_INTEGER_CLASS
3789 && (mode == CDImode || mode == TImode || mode == TFmode)
3790 && intreg[0] + 1 == intreg[1])
3791 return gen_rtx_REG (mode, intreg[0]);
3793 /* Otherwise figure out the entries of the PARALLEL. */
3794 for (i = 0; i < n; i++)
3796 switch (regclass[i])
3798 case X86_64_NO_CLASS:
3800 case X86_64_INTEGER_CLASS:
3801 case X86_64_INTEGERSI_CLASS:
3802 /* Merge TImodes on aligned occasions here too. */
3803 if (i * 8 + 8 > bytes)
3804 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3805 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
3809 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3810 if (tmpmode == BLKmode)
3812 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3813 gen_rtx_REG (tmpmode, *intreg),
3817 case X86_64_SSESF_CLASS:
3818 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3819 gen_rtx_REG (SFmode,
3820 SSE_REGNO (sse_regno)),
3824 case X86_64_SSEDF_CLASS:
3825 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3826 gen_rtx_REG (DFmode,
3827 SSE_REGNO (sse_regno)),
3831 case X86_64_SSE_CLASS:
3832 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
3836 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3837 gen_rtx_REG (tmpmode,
3838 SSE_REGNO (sse_regno)),
3840 if (tmpmode == TImode)
3849 /* Empty aligned struct, union or class. */
3853 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3854 for (i = 0; i < nexps; i++)
3855 XVECEXP (ret, 0, i) = exp [i];
3859 /* Update the data in CUM to advance over an argument of mode MODE
3860 and data type TYPE. (TYPE is null for libcalls where that information
3861 may not be available.) */
3864 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3865 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3881 cum->words += words;
3882 cum->nregs -= words;
3883 cum->regno += words;
3885 if (cum->nregs <= 0)
3893 if (cum->float_in_sse < 2)
3896 if (cum->float_in_sse < 1)
3907 if (!type || !AGGREGATE_TYPE_P (type))
3909 cum->sse_words += words;
3910 cum->sse_nregs -= 1;
3911 cum->sse_regno += 1;
3912 if (cum->sse_nregs <= 0)
3924 if (!type || !AGGREGATE_TYPE_P (type))
3926 cum->mmx_words += words;
3927 cum->mmx_nregs -= 1;
3928 cum->mmx_regno += 1;
3929 if (cum->mmx_nregs <= 0)
3940 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3941 tree type, HOST_WIDE_INT words)
3943 int int_nregs, sse_nregs;
3945 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3946 cum->words += words;
3947 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3949 cum->nregs -= int_nregs;
3950 cum->sse_nregs -= sse_nregs;
3951 cum->regno += int_nregs;
3952 cum->sse_regno += sse_nregs;
3955 cum->words += words;
3959 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3960 HOST_WIDE_INT words)
3962 /* Otherwise, this should be passed indirect. */
3963 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3965 cum->words += words;
3974 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3975 tree type, int named ATTRIBUTE_UNUSED)
3977 HOST_WIDE_INT bytes, words;
3979 if (mode == BLKmode)
3980 bytes = int_size_in_bytes (type);
3982 bytes = GET_MODE_SIZE (mode);
3983 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3986 mode = type_natural_mode (type);
3988 if (TARGET_64BIT_MS_ABI)
3989 function_arg_advance_ms_64 (cum, bytes, words);
3990 else if (TARGET_64BIT)
3991 function_arg_advance_64 (cum, mode, type, words);
3993 function_arg_advance_32 (cum, mode, type, bytes, words);
3996 /* Define where to put the arguments to a function.
3997 Value is zero to push the argument on the stack,
3998 or a hard register in which to store the argument.
4000 MODE is the argument's machine mode.
4001 TYPE is the data type of the argument (as a tree).
4002 This is null for libcalls where that information may
4004 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4005 the preceding args and about the function being called.
4006 NAMED is nonzero if this argument is a named parameter
4007 (otherwise it is an extra parameter matching an ellipsis). */
4010 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4011 enum machine_mode orig_mode, tree type,
4012 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4014 static bool warnedsse, warnedmmx;
4016 /* Avoid the AL settings for the Unix64 ABI. */
4017 if (mode == VOIDmode)
4033 if (words <= cum->nregs)
4035 int regno = cum->regno;
4037 /* Fastcall allocates the first two DWORD (SImode) or
4038 smaller arguments to ECX and EDX. */
4041 if (mode == BLKmode || mode == DImode)
4044 /* ECX not EAX is the first allocated register. */
4048 return gen_rtx_REG (mode, regno);
4053 if (cum->float_in_sse < 2)
4056 if (cum->float_in_sse < 1)
4066 if (!type || !AGGREGATE_TYPE_P (type))
4068 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4071 warning (0, "SSE vector argument without SSE enabled "
4075 return gen_reg_or_parallel (mode, orig_mode,
4076 cum->sse_regno + FIRST_SSE_REG);
4084 if (!type || !AGGREGATE_TYPE_P (type))
4086 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4089 warning (0, "MMX vector argument without MMX enabled "
4093 return gen_reg_or_parallel (mode, orig_mode,
4094 cum->mmx_regno + FIRST_MMX_REG);
4103 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4104 enum machine_mode orig_mode, tree type)
4106 /* Handle a hidden AL argument containing number of registers
4107 for varargs x86-64 functions. */
4108 if (mode == VOIDmode)
4109 return GEN_INT (cum->maybe_vaarg
4110 ? (cum->sse_nregs < 0
4115 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4117 &x86_64_int_parameter_registers [cum->regno],
4122 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4123 enum machine_mode orig_mode, int named)
4127 /* Avoid the AL settings for the Unix64 ABI. */
4128 if (mode == VOIDmode)
4131 /* If we've run out of registers, it goes on the stack. */
4132 if (cum->nregs == 0)
4135 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4137 /* Only floating point modes are passed in anything but integer regs. */
4138 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4141 regno = cum->regno + FIRST_SSE_REG;
4146 /* Unnamed floating parameters are passed in both the
4147 SSE and integer registers. */
4148 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4149 t2 = gen_rtx_REG (mode, regno);
4150 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4151 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4152 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4156 return gen_reg_or_parallel (mode, orig_mode, regno);
4160 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4161 tree type, int named)
4163 enum machine_mode mode = omode;
4164 HOST_WIDE_INT bytes, words;
4166 if (mode == BLKmode)
4167 bytes = int_size_in_bytes (type);
4169 bytes = GET_MODE_SIZE (mode);
4170 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4172 /* To simplify the code below, represent vector types with a vector mode
4173 even if MMX/SSE are not active. */
4174 if (type && TREE_CODE (type) == VECTOR_TYPE)
4175 mode = type_natural_mode (type);
4177 if (TARGET_64BIT_MS_ABI)
4178 return function_arg_ms_64 (cum, mode, omode, named);
4179 else if (TARGET_64BIT)
4180 return function_arg_64 (cum, mode, omode, type);
4182 return function_arg_32 (cum, mode, omode, type, bytes, words);
4185 /* A C expression that indicates when an argument must be passed by
4186 reference. If nonzero for an argument, a copy of that argument is
4187 made in memory and a pointer to the argument is passed instead of
4188 the argument itself. The pointer is passed in whatever way is
4189 appropriate for passing a pointer to that type. */
4192 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4193 enum machine_mode mode ATTRIBUTE_UNUSED,
4194 tree type, bool named ATTRIBUTE_UNUSED)
4196 if (TARGET_64BIT_MS_ABI)
4200 /* Arrays are passed by reference. */
4201 if (TREE_CODE (type) == ARRAY_TYPE)
4204 if (AGGREGATE_TYPE_P (type))
4206 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4207 are passed by reference. */
4208 int el2 = exact_log2 (int_size_in_bytes (type));
4209 return !(el2 >= 0 && el2 <= 3);
4213 /* __m128 is passed by reference. */
4214 /* ??? How to handle complex? For now treat them as structs,
4215 and pass them by reference if they're too large. */
4216 if (GET_MODE_SIZE (mode) > 8)
4219 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4225 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4226 ABI. Only called if TARGET_SSE. */
4228 contains_128bit_aligned_vector_p (tree type)
4230 enum machine_mode mode = TYPE_MODE (type);
4231 if (SSE_REG_MODE_P (mode)
4232 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4234 if (TYPE_ALIGN (type) < 128)
4237 if (AGGREGATE_TYPE_P (type))
4239 /* Walk the aggregates recursively. */
4240 switch (TREE_CODE (type))
4244 case QUAL_UNION_TYPE:
4248 /* Walk all the structure fields. */
4249 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4251 if (TREE_CODE (field) == FIELD_DECL
4252 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4259 /* Just for use if some languages passes arrays by value. */
4260 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4271 /* Gives the alignment boundary, in bits, of an argument with the
4272 specified mode and type. */
4275 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4279 align = TYPE_ALIGN (type);
4281 align = GET_MODE_ALIGNMENT (mode);
4282 if (align < PARM_BOUNDARY)
4283 align = PARM_BOUNDARY;
4286 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4287 make an exception for SSE modes since these require 128bit
4290 The handling here differs from field_alignment. ICC aligns MMX
4291 arguments to 4 byte boundaries, while structure fields are aligned
4292 to 8 byte boundaries. */
4294 align = PARM_BOUNDARY;
4297 if (!SSE_REG_MODE_P (mode))
4298 align = PARM_BOUNDARY;
4302 if (!contains_128bit_aligned_vector_p (type))
4303 align = PARM_BOUNDARY;
4311 /* Return true if N is a possible register number of function value. */
4314 ix86_function_value_regno_p (int regno)
4321 case FIRST_FLOAT_REG:
4322 if (TARGET_64BIT_MS_ABI)
4324 return TARGET_FLOAT_RETURNS_IN_80387;
4330 if (TARGET_MACHO || TARGET_64BIT)
4338 /* Define how to find the value returned by a function.
4339 VALTYPE is the data type of the value (as a tree).
4340 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4341 otherwise, FUNC is 0. */
4344 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4345 tree fntype, tree fn)
4349 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4350 we normally prevent this case when mmx is not available. However
4351 some ABIs may require the result to be returned like DImode. */
4352 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4353 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4355 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4356 we prevent this case when sse is not available. However some ABIs
4357 may require the result to be returned like integer TImode. */
4358 else if (mode == TImode
4359 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4360 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4362 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4363 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4364 regno = FIRST_FLOAT_REG;
4366 /* Most things go in %eax. */
4369 /* Override FP return register with %xmm0 for local functions when
4370 SSE math is enabled or for functions with sseregparm attribute. */
4371 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4373 int sse_level = ix86_function_sseregparm (fntype, fn);
4374 if ((sse_level >= 1 && mode == SFmode)
4375 || (sse_level == 2 && mode == DFmode))
4376 regno = FIRST_SSE_REG;
4379 return gen_rtx_REG (orig_mode, regno);
4383 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4388 /* Handle libcalls, which don't provide a type node. */
4389 if (valtype == NULL)
4401 return gen_rtx_REG (mode, FIRST_SSE_REG);
4404 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4408 return gen_rtx_REG (mode, 0);
4412 ret = construct_container (mode, orig_mode, valtype, 1,
4413 REGPARM_MAX, SSE_REGPARM_MAX,
4414 x86_64_int_return_registers, 0);
4416 /* For zero sized structures, construct_container returns NULL, but we
4417 need to keep rest of compiler happy by returning meaningful value. */
4419 ret = gen_rtx_REG (orig_mode, 0);
4425 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4427 unsigned int regno = 0;
4431 if (mode == SFmode || mode == DFmode)
4432 regno = FIRST_SSE_REG;
4433 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4434 regno = FIRST_SSE_REG;
4437 return gen_rtx_REG (orig_mode, regno);
4441 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4442 enum machine_mode orig_mode, enum machine_mode mode)
4447 if (fntype_or_decl && DECL_P (fntype_or_decl))
4448 fn = fntype_or_decl;
4449 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4451 if (TARGET_64BIT_MS_ABI)
4452 return function_value_ms_64 (orig_mode, mode);
4453 else if (TARGET_64BIT)
4454 return function_value_64 (orig_mode, mode, valtype);
4456 return function_value_32 (orig_mode, mode, fntype, fn);
4460 ix86_function_value (tree valtype, tree fntype_or_decl,
4461 bool outgoing ATTRIBUTE_UNUSED)
4463 enum machine_mode mode, orig_mode;
4465 orig_mode = TYPE_MODE (valtype);
4466 mode = type_natural_mode (valtype);
4467 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4471 ix86_libcall_value (enum machine_mode mode)
4473 return ix86_function_value_1 (NULL, NULL, mode, mode);
4476 /* Return true iff type is returned in memory. */
4479 return_in_memory_32 (tree type, enum machine_mode mode)
4483 if (mode == BLKmode)
4486 size = int_size_in_bytes (type);
4488 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4491 if (VECTOR_MODE_P (mode) || mode == TImode)
4493 /* User-created vectors small enough to fit in EAX. */
4497 /* MMX/3dNow values are returned in MM0,
4498 except when it doesn't exits. */
4500 return (TARGET_MMX ? 0 : 1);
4502 /* SSE values are returned in XMM0, except when it doesn't exist. */
4504 return (TARGET_SSE ? 0 : 1);
4519 return_in_memory_64 (tree type, enum machine_mode mode)
4521 int needed_intregs, needed_sseregs;
4522 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4526 return_in_memory_ms_64 (tree type, enum machine_mode mode)
4528 HOST_WIDE_INT size = int_size_in_bytes (type);
4530 /* __m128 and friends are returned in xmm0. */
4531 if (size == 16 && VECTOR_MODE_P (mode))
4534 /* Otherwise, the size must be exactly in [1248]. */
4535 return (size != 1 && size != 2 && size != 4 && size != 8);
4539 ix86_return_in_memory (tree type)
4541 enum machine_mode mode = type_natural_mode (type);
4543 if (TARGET_64BIT_MS_ABI)
4544 return return_in_memory_ms_64 (type, mode);
4545 else if (TARGET_64BIT)
4546 return return_in_memory_64 (type, mode);
4548 return return_in_memory_32 (type, mode);
4551 /* Return false iff TYPE is returned in memory. This version is used
4552 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4553 but differs notably in that when MMX is available, 8-byte vectors
4554 are returned in memory, rather than in MMX registers. */
4557 ix86_sol10_return_in_memory (tree type)
4560 enum machine_mode mode = type_natural_mode (type);
4563 return return_in_memory_64 (type, mode);
4565 if (mode == BLKmode)
4568 size = int_size_in_bytes (type);
4570 if (VECTOR_MODE_P (mode))
4572 /* Return in memory only if MMX registers *are* available. This
4573 seems backwards, but it is consistent with the existing
4580 else if (mode == TImode)
4582 else if (mode == XFmode)
4588 /* When returning SSE vector types, we have a choice of either
4589 (1) being abi incompatible with a -march switch, or
4590 (2) generating an error.
4591 Given no good solution, I think the safest thing is one warning.
4592 The user won't be able to use -Werror, but....
4594 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4595 called in response to actually generating a caller or callee that
4596 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4597 via aggregate_value_p for general type probing from tree-ssa. */
4600 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4602 static bool warnedsse, warnedmmx;
4604 if (!TARGET_64BIT && type)
4606 /* Look at the return type of the function, not the function type. */
4607 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4609 if (!TARGET_SSE && !warnedsse)
4612 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4615 warning (0, "SSE vector return without SSE enabled "
4620 if (!TARGET_MMX && !warnedmmx)
4622 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4625 warning (0, "MMX vector return without MMX enabled "
4635 /* Create the va_list data type. */
4638 ix86_build_builtin_va_list (void)
4640 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4642 /* For i386 we use plain pointer to argument area. */
4643 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4644 return build_pointer_type (char_type_node);
4646 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4647 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4649 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4650 unsigned_type_node);
4651 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4652 unsigned_type_node);
4653 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4655 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4658 va_list_gpr_counter_field = f_gpr;
4659 va_list_fpr_counter_field = f_fpr;
4661 DECL_FIELD_CONTEXT (f_gpr) = record;
4662 DECL_FIELD_CONTEXT (f_fpr) = record;
4663 DECL_FIELD_CONTEXT (f_ovf) = record;
4664 DECL_FIELD_CONTEXT (f_sav) = record;
4666 TREE_CHAIN (record) = type_decl;
4667 TYPE_NAME (record) = type_decl;
4668 TYPE_FIELDS (record) = f_gpr;
4669 TREE_CHAIN (f_gpr) = f_fpr;
4670 TREE_CHAIN (f_fpr) = f_ovf;
4671 TREE_CHAIN (f_ovf) = f_sav;
4673 layout_type (record);
4675 /* The correct type is an array type of one element. */
4676 return build_array_type (record, build_index_type (size_zero_node));
4679 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4682 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4692 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4695 /* Indicate to allocate space on the stack for varargs save area. */
4696 ix86_save_varrargs_registers = 1;
4697 /* We need 16-byte stack alignment to save SSE registers. If user
4698 asked for lower preferred_stack_boundary, lets just hope that he knows
4699 what he is doing and won't varargs SSE values.
4701 We also may end up assuming that only 64bit values are stored in SSE
4702 register let some floating point program work. */
4703 if (ix86_preferred_stack_boundary >= 128)
4704 cfun->stack_alignment_needed = 128;
4706 save_area = frame_pointer_rtx;
4707 set = get_varargs_alias_set ();
4709 for (i = cum->regno;
4711 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4714 mem = gen_rtx_MEM (Pmode,
4715 plus_constant (save_area, i * UNITS_PER_WORD));
4716 MEM_NOTRAP_P (mem) = 1;
4717 set_mem_alias_set (mem, set);
4718 emit_move_insn (mem, gen_rtx_REG (Pmode,
4719 x86_64_int_parameter_registers[i]));
4722 if (cum->sse_nregs && cfun->va_list_fpr_size)
4724 /* Now emit code to save SSE registers. The AX parameter contains number
4725 of SSE parameter registers used to call this function. We use
4726 sse_prologue_save insn template that produces computed jump across
4727 SSE saves. We need some preparation work to get this working. */
4729 label = gen_label_rtx ();
4730 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4732 /* Compute address to jump to :
4733 label - 5*eax + nnamed_sse_arguments*5 */
4734 tmp_reg = gen_reg_rtx (Pmode);
4735 nsse_reg = gen_reg_rtx (Pmode);
4736 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4737 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4738 gen_rtx_MULT (Pmode, nsse_reg,
4743 gen_rtx_CONST (DImode,
4744 gen_rtx_PLUS (DImode,
4746 GEN_INT (cum->sse_regno * 4))));
4748 emit_move_insn (nsse_reg, label_ref);
4749 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4751 /* Compute address of memory block we save into. We always use pointer
4752 pointing 127 bytes after first byte to store - this is needed to keep
4753 instruction size limited by 4 bytes. */
4754 tmp_reg = gen_reg_rtx (Pmode);
4755 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4756 plus_constant (save_area,
4757 8 * REGPARM_MAX + 127)));
4758 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4759 MEM_NOTRAP_P (mem) = 1;
4760 set_mem_alias_set (mem, set);
4761 set_mem_align (mem, BITS_PER_WORD);
4763 /* And finally do the dirty job! */
4764 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4765 GEN_INT (cum->sse_regno), label));
4770 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4772 alias_set_type set = get_varargs_alias_set ();
4775 for (i = cum->regno; i < REGPARM_MAX; i++)
4779 mem = gen_rtx_MEM (Pmode,
4780 plus_constant (virtual_incoming_args_rtx,
4781 i * UNITS_PER_WORD));
4782 MEM_NOTRAP_P (mem) = 1;
4783 set_mem_alias_set (mem, set);
4785 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4786 emit_move_insn (mem, reg);
4791 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4792 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4795 CUMULATIVE_ARGS next_cum;
4799 /* This argument doesn't appear to be used anymore. Which is good,
4800 because the old code here didn't suppress rtl generation. */
4801 gcc_assert (!no_rtl);
4806 fntype = TREE_TYPE (current_function_decl);
4807 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4808 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4809 != void_type_node));
4811 /* For varargs, we do not want to skip the dummy va_dcl argument.
4812 For stdargs, we do want to skip the last named argument. */
4815 function_arg_advance (&next_cum, mode, type, 1);
4817 if (TARGET_64BIT_MS_ABI)
4818 setup_incoming_varargs_ms_64 (&next_cum);
4820 setup_incoming_varargs_64 (&next_cum);
4823 /* Implement va_start. */
4826 ix86_va_start (tree valist, rtx nextarg)
4828 HOST_WIDE_INT words, n_gpr, n_fpr;
4829 tree f_gpr, f_fpr, f_ovf, f_sav;
4830 tree gpr, fpr, ovf, sav, t;
4833 /* Only 64bit target needs something special. */
4834 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4836 std_expand_builtin_va_start (valist, nextarg);
4840 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4841 f_fpr = TREE_CHAIN (f_gpr);
4842 f_ovf = TREE_CHAIN (f_fpr);
4843 f_sav = TREE_CHAIN (f_ovf);
4845 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4846 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4847 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4848 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4849 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4851 /* Count number of gp and fp argument registers used. */
4852 words = current_function_args_info.words;
4853 n_gpr = current_function_args_info.regno;
4854 n_fpr = current_function_args_info.sse_regno;
4856 if (cfun->va_list_gpr_size)
4858 type = TREE_TYPE (gpr);
4859 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4860 build_int_cst (type, n_gpr * 8));
4861 TREE_SIDE_EFFECTS (t) = 1;
4862 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4865 if (cfun->va_list_fpr_size)
4867 type = TREE_TYPE (fpr);
4868 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4869 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4870 TREE_SIDE_EFFECTS (t) = 1;
4871 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4874 /* Find the overflow area. */
4875 type = TREE_TYPE (ovf);
4876 t = make_tree (type, virtual_incoming_args_rtx);
4878 t = build2 (POINTER_PLUS_EXPR, type, t,
4879 size_int (words * UNITS_PER_WORD));
4880 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4881 TREE_SIDE_EFFECTS (t) = 1;
4882 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4884 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4886 /* Find the register save area.
4887 Prologue of the function save it right above stack frame. */
4888 type = TREE_TYPE (sav);
4889 t = make_tree (type, frame_pointer_rtx);
4890 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4891 TREE_SIDE_EFFECTS (t) = 1;
4892 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4896 /* Implement va_arg. */
4899 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4901 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4902 tree f_gpr, f_fpr, f_ovf, f_sav;
4903 tree gpr, fpr, ovf, sav, t;
4905 tree lab_false, lab_over = NULL_TREE;
4910 enum machine_mode nat_mode;
4912 /* Only 64bit target needs something special. */
4913 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4914 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4916 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4917 f_fpr = TREE_CHAIN (f_gpr);
4918 f_ovf = TREE_CHAIN (f_fpr);
4919 f_sav = TREE_CHAIN (f_ovf);
4921 valist = build_va_arg_indirect_ref (valist);
4922 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4923 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4924 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4925 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4927 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4929 type = build_pointer_type (type);
4930 size = int_size_in_bytes (type);
4931 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4933 nat_mode = type_natural_mode (type);
4934 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4935 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4937 /* Pull the value out of the saved registers. */
4939 addr = create_tmp_var (ptr_type_node, "addr");
4940 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4944 int needed_intregs, needed_sseregs;
4946 tree int_addr, sse_addr;
4948 lab_false = create_artificial_label ();
4949 lab_over = create_artificial_label ();
4951 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4953 need_temp = (!REG_P (container)
4954 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4955 || TYPE_ALIGN (type) > 128));
4957 /* In case we are passing structure, verify that it is consecutive block
4958 on the register save area. If not we need to do moves. */
4959 if (!need_temp && !REG_P (container))
4961 /* Verify that all registers are strictly consecutive */
4962 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4966 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4968 rtx slot = XVECEXP (container, 0, i);
4969 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4970 || INTVAL (XEXP (slot, 1)) != i * 16)
4978 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4980 rtx slot = XVECEXP (container, 0, i);
4981 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4982 || INTVAL (XEXP (slot, 1)) != i * 8)
4994 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4995 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4996 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4997 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5000 /* First ensure that we fit completely in registers. */
5003 t = build_int_cst (TREE_TYPE (gpr),
5004 (REGPARM_MAX - needed_intregs + 1) * 8);
5005 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5006 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5007 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5008 gimplify_and_add (t, pre_p);
5012 t = build_int_cst (TREE_TYPE (fpr),
5013 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5015 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5016 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5017 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5018 gimplify_and_add (t, pre_p);
5021 /* Compute index to start of area used for integer regs. */
5024 /* int_addr = gpr + sav; */
5025 t = fold_convert (sizetype, gpr);
5026 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5027 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5028 gimplify_and_add (t, pre_p);
5032 /* sse_addr = fpr + sav; */
5033 t = fold_convert (sizetype, fpr);
5034 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5035 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5036 gimplify_and_add (t, pre_p);
5041 tree temp = create_tmp_var (type, "va_arg_tmp");
5044 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5045 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5046 gimplify_and_add (t, pre_p);
5048 for (i = 0; i < XVECLEN (container, 0); i++)
5050 rtx slot = XVECEXP (container, 0, i);
5051 rtx reg = XEXP (slot, 0);
5052 enum machine_mode mode = GET_MODE (reg);
5053 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5054 tree addr_type = build_pointer_type (piece_type);
5057 tree dest_addr, dest;
5059 if (SSE_REGNO_P (REGNO (reg)))
5061 src_addr = sse_addr;
5062 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5066 src_addr = int_addr;
5067 src_offset = REGNO (reg) * 8;
5069 src_addr = fold_convert (addr_type, src_addr);
5070 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5071 size_int (src_offset));
5072 src = build_va_arg_indirect_ref (src_addr);
5074 dest_addr = fold_convert (addr_type, addr);
5075 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5076 size_int (INTVAL (XEXP (slot, 1))));
5077 dest = build_va_arg_indirect_ref (dest_addr);
5079 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5080 gimplify_and_add (t, pre_p);
5086 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5087 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5088 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5089 gimplify_and_add (t, pre_p);
5093 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5094 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5095 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5096 gimplify_and_add (t, pre_p);
5099 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5100 gimplify_and_add (t, pre_p);
5102 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5103 append_to_statement_list (t, pre_p);
5106 /* ... otherwise out of the overflow area. */
5108 /* Care for on-stack alignment if needed. */
5109 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5110 || integer_zerop (TYPE_SIZE (type)))
5114 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5115 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5116 size_int (align - 1));
5117 t = fold_convert (sizetype, t);
5118 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5120 t = fold_convert (TREE_TYPE (ovf), t);
5122 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5124 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5125 gimplify_and_add (t2, pre_p);
5127 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5128 size_int (rsize * UNITS_PER_WORD));
5129 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5130 gimplify_and_add (t, pre_p);
5134 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5135 append_to_statement_list (t, pre_p);
5138 ptrtype = build_pointer_type (type);
5139 addr = fold_convert (ptrtype, addr);
5142 addr = build_va_arg_indirect_ref (addr);
5143 return build_va_arg_indirect_ref (addr);
5146 /* Return nonzero if OPNUM's MEM should be matched
5147 in movabs* patterns. */
5150 ix86_check_movabs (rtx insn, int opnum)
5154 set = PATTERN (insn);
5155 if (GET_CODE (set) == PARALLEL)
5156 set = XVECEXP (set, 0, 0);
5157 gcc_assert (GET_CODE (set) == SET);
5158 mem = XEXP (set, opnum);
5159 while (GET_CODE (mem) == SUBREG)
5160 mem = SUBREG_REG (mem);
5161 gcc_assert (MEM_P (mem));
5162 return (volatile_ok || !MEM_VOLATILE_P (mem));
5165 /* Initialize the table of extra 80387 mathematical constants. */
5168 init_ext_80387_constants (void)
5170 static const char * cst[5] =
5172 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5173 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5174 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5175 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5176 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5180 for (i = 0; i < 5; i++)
5182 real_from_string (&ext_80387_constants_table[i], cst[i]);
5183 /* Ensure each constant is rounded to XFmode precision. */
5184 real_convert (&ext_80387_constants_table[i],
5185 XFmode, &ext_80387_constants_table[i]);
5188 ext_80387_constants_init = 1;
5191 /* Return true if the constant is something that can be loaded with
5192 a special instruction. */
5195 standard_80387_constant_p (rtx x)
5197 enum machine_mode mode = GET_MODE (x);
5201 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5204 if (x == CONST0_RTX (mode))
5206 if (x == CONST1_RTX (mode))
5209 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5211 /* For XFmode constants, try to find a special 80387 instruction when
5212 optimizing for size or on those CPUs that benefit from them. */
5214 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5218 if (! ext_80387_constants_init)
5219 init_ext_80387_constants ();
5221 for (i = 0; i < 5; i++)
5222 if (real_identical (&r, &ext_80387_constants_table[i]))
5226 /* Load of the constant -0.0 or -1.0 will be split as
5227 fldz;fchs or fld1;fchs sequence. */
5228 if (real_isnegzero (&r))
5230 if (real_identical (&r, &dconstm1))
5236 /* Return the opcode of the special instruction to be used to load
5240 standard_80387_constant_opcode (rtx x)
5242 switch (standard_80387_constant_p (x))
5266 /* Return the CONST_DOUBLE representing the 80387 constant that is
5267 loaded by the specified special instruction. The argument IDX
5268 matches the return value from standard_80387_constant_p. */
5271 standard_80387_constant_rtx (int idx)
5275 if (! ext_80387_constants_init)
5276 init_ext_80387_constants ();
5292 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5296 /* Return 1 if mode is a valid mode for sse. */
5298 standard_sse_mode_p (enum machine_mode mode)
5315 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5318 standard_sse_constant_p (rtx x)
5320 enum machine_mode mode = GET_MODE (x);
5322 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5324 if (vector_all_ones_operand (x, mode)
5325 && standard_sse_mode_p (mode))
5326 return TARGET_SSE2 ? 2 : -1;
5331 /* Return the opcode of the special instruction to be used to load
5335 standard_sse_constant_opcode (rtx insn, rtx x)
5337 switch (standard_sse_constant_p (x))
5340 if (get_attr_mode (insn) == MODE_V4SF)
5341 return "xorps\t%0, %0";
5342 else if (get_attr_mode (insn) == MODE_V2DF)
5343 return "xorpd\t%0, %0";
5345 return "pxor\t%0, %0";
5347 return "pcmpeqd\t%0, %0";
5352 /* Returns 1 if OP contains a symbol reference */
5355 symbolic_reference_mentioned_p (rtx op)
5360 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5363 fmt = GET_RTX_FORMAT (GET_CODE (op));
5364 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5370 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5371 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5375 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5382 /* Return 1 if it is appropriate to emit `ret' instructions in the
5383 body of a function. Do this only if the epilogue is simple, needing a
5384 couple of insns. Prior to reloading, we can't tell how many registers
5385 must be saved, so return 0 then. Return 0 if there is no frame
5386 marker to de-allocate. */
5389 ix86_can_use_return_insn_p (void)
5391 struct ix86_frame frame;
5393 if (! reload_completed || frame_pointer_needed)
5396 /* Don't allow more than 32 pop, since that's all we can do
5397 with one instruction. */
5398 if (current_function_pops_args
5399 && current_function_args_size >= 32768)
5402 ix86_compute_frame_layout (&frame);
5403 return frame.to_allocate == 0 && frame.nregs == 0;
5406 /* Value should be nonzero if functions must have frame pointers.
5407 Zero means the frame pointer need not be set up (and parms may
5408 be accessed via the stack pointer) in functions that seem suitable. */
5411 ix86_frame_pointer_required (void)
5413 /* If we accessed previous frames, then the generated code expects
5414 to be able to access the saved ebp value in our frame. */
5415 if (cfun->machine->accesses_prev_frame)
5418 /* Several x86 os'es need a frame pointer for other reasons,
5419 usually pertaining to setjmp. */
5420 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5423 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5424 the frame pointer by default. Turn it back on now if we've not
5425 got a leaf function. */
5426 if (TARGET_OMIT_LEAF_FRAME_POINTER
5427 && (!current_function_is_leaf
5428 || ix86_current_function_calls_tls_descriptor))
5431 if (current_function_profile)
5437 /* Record that the current function accesses previous call frames. */
5440 ix86_setup_frame_addresses (void)
5442 cfun->machine->accesses_prev_frame = 1;
5445 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5446 # define USE_HIDDEN_LINKONCE 1
5448 # define USE_HIDDEN_LINKONCE 0
5451 static int pic_labels_used;
5453 /* Fills in the label name that should be used for a pc thunk for
5454 the given register. */
5457 get_pc_thunk_name (char name[32], unsigned int regno)
5459 gcc_assert (!TARGET_64BIT);
5461 if (USE_HIDDEN_LINKONCE)
5462 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5464 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5468 /* This function generates code for -fpic that loads %ebx with
5469 the return address of the caller and then returns. */
5472 ix86_file_end (void)
5477 for (regno = 0; regno < 8; ++regno)
5481 if (! ((pic_labels_used >> regno) & 1))
5484 get_pc_thunk_name (name, regno);
5489 switch_to_section (darwin_sections[text_coal_section]);
5490 fputs ("\t.weak_definition\t", asm_out_file);
5491 assemble_name (asm_out_file, name);
5492 fputs ("\n\t.private_extern\t", asm_out_file);
5493 assemble_name (asm_out_file, name);
5494 fputs ("\n", asm_out_file);
5495 ASM_OUTPUT_LABEL (asm_out_file, name);
5499 if (USE_HIDDEN_LINKONCE)
5503 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5505 TREE_PUBLIC (decl) = 1;
5506 TREE_STATIC (decl) = 1;
5507 DECL_ONE_ONLY (decl) = 1;
5509 (*targetm.asm_out.unique_section) (decl, 0);
5510 switch_to_section (get_named_section (decl, NULL, 0));
5512 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5513 fputs ("\t.hidden\t", asm_out_file);
5514 assemble_name (asm_out_file, name);
5515 fputc ('\n', asm_out_file);
5516 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5520 switch_to_section (text_section);
5521 ASM_OUTPUT_LABEL (asm_out_file, name);
5524 xops[0] = gen_rtx_REG (SImode, regno);
5525 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5526 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5527 output_asm_insn ("ret", xops);
5530 if (NEED_INDICATE_EXEC_STACK)
5531 file_end_indicate_exec_stack ();
5534 /* Emit code for the SET_GOT patterns. */
5537 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5543 if (TARGET_VXWORKS_RTP && flag_pic)
5545 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5546 xops[2] = gen_rtx_MEM (Pmode,
5547 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5548 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5550 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5551 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5552 an unadorned address. */
5553 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5554 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5555 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5559 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5561 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5563 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5566 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5568 output_asm_insn ("call\t%a2", xops);
5571 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5572 is what will be referenced by the Mach-O PIC subsystem. */
5574 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5577 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5578 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5581 output_asm_insn ("pop{l}\t%0", xops);
5586 get_pc_thunk_name (name, REGNO (dest));
5587 pic_labels_used |= 1 << REGNO (dest);
5589 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5590 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5591 output_asm_insn ("call\t%X2", xops);
5592 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5593 is what will be referenced by the Mach-O PIC subsystem. */
5596 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5598 targetm.asm_out.internal_label (asm_out_file, "L",
5599 CODE_LABEL_NUMBER (label));
5606 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5607 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5609 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5614 /* Generate an "push" pattern for input ARG. */
5619 return gen_rtx_SET (VOIDmode,
5621 gen_rtx_PRE_DEC (Pmode,
5622 stack_pointer_rtx)),
5626 /* Return >= 0 if there is an unused call-clobbered register available
5627 for the entire function. */
5630 ix86_select_alt_pic_regnum (void)
5632 if (current_function_is_leaf && !current_function_profile
5633 && !ix86_current_function_calls_tls_descriptor)
5636 for (i = 2; i >= 0; --i)
5637 if (!df_regs_ever_live_p (i))
5641 return INVALID_REGNUM;
5644 /* Return 1 if we need to save REGNO. */
5646 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5648 if (pic_offset_table_rtx
5649 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5650 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5651 || current_function_profile
5652 || current_function_calls_eh_return
5653 || current_function_uses_const_pool))
5655 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5660 if (current_function_calls_eh_return && maybe_eh_return)
5665 unsigned test = EH_RETURN_DATA_REGNO (i);
5666 if (test == INVALID_REGNUM)
5673 if (cfun->machine->force_align_arg_pointer
5674 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5677 return (df_regs_ever_live_p (regno)
5678 && !call_used_regs[regno]
5679 && !fixed_regs[regno]
5680 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5683 /* Return number of registers to be saved on the stack. */
5686 ix86_nsaved_regs (void)
5691 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5692 if (ix86_save_reg (regno, true))
5697 /* Return the offset between two registers, one to be eliminated, and the other
5698 its replacement, at the start of a routine. */
5701 ix86_initial_elimination_offset (int from, int to)
5703 struct ix86_frame frame;
5704 ix86_compute_frame_layout (&frame);
5706 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5707 return frame.hard_frame_pointer_offset;
5708 else if (from == FRAME_POINTER_REGNUM
5709 && to == HARD_FRAME_POINTER_REGNUM)
5710 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5713 gcc_assert (to == STACK_POINTER_REGNUM);
5715 if (from == ARG_POINTER_REGNUM)
5716 return frame.stack_pointer_offset;
5718 gcc_assert (from == FRAME_POINTER_REGNUM);
5719 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5723 /* Fill structure ix86_frame about frame of currently computed function. */
5726 ix86_compute_frame_layout (struct ix86_frame *frame)
5728 HOST_WIDE_INT total_size;
5729 unsigned int stack_alignment_needed;
5730 HOST_WIDE_INT offset;
5731 unsigned int preferred_alignment;
5732 HOST_WIDE_INT size = get_frame_size ();
5734 frame->nregs = ix86_nsaved_regs ();
5737 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5738 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5740 /* During reload iteration the amount of registers saved can change.
5741 Recompute the value as needed. Do not recompute when amount of registers
5742 didn't change as reload does multiple calls to the function and does not
5743 expect the decision to change within single iteration. */
5745 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5747 int count = frame->nregs;
5749 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5750 /* The fast prologue uses move instead of push to save registers. This
5751 is significantly longer, but also executes faster as modern hardware
5752 can execute the moves in parallel, but can't do that for push/pop.
5754 Be careful about choosing what prologue to emit: When function takes
5755 many instructions to execute we may use slow version as well as in
5756 case function is known to be outside hot spot (this is known with
5757 feedback only). Weight the size of function by number of registers
5758 to save as it is cheap to use one or two push instructions but very
5759 slow to use many of them. */
5761 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5762 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5763 || (flag_branch_probabilities
5764 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5765 cfun->machine->use_fast_prologue_epilogue = false;
5767 cfun->machine->use_fast_prologue_epilogue
5768 = !expensive_function_p (count);
5770 if (TARGET_PROLOGUE_USING_MOVE
5771 && cfun->machine->use_fast_prologue_epilogue)
5772 frame->save_regs_using_mov = true;
5774 frame->save_regs_using_mov = false;
5777 /* Skip return address and saved base pointer. */
5778 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5780 frame->hard_frame_pointer_offset = offset;
5782 /* Do some sanity checking of stack_alignment_needed and
5783 preferred_alignment, since i386 port is the only using those features
5784 that may break easily. */
5786 gcc_assert (!size || stack_alignment_needed);
5787 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5788 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5789 gcc_assert (stack_alignment_needed
5790 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5792 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5793 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5795 /* Register save area */
5796 offset += frame->nregs * UNITS_PER_WORD;
5799 if (ix86_save_varrargs_registers)
5801 offset += X86_64_VARARGS_SIZE;
5802 frame->va_arg_size = X86_64_VARARGS_SIZE;
5805 frame->va_arg_size = 0;
5807 /* Align start of frame for local function. */
5808 frame->padding1 = ((offset + stack_alignment_needed - 1)
5809 & -stack_alignment_needed) - offset;
5811 offset += frame->padding1;
5813 /* Frame pointer points here. */
5814 frame->frame_pointer_offset = offset;
5818 /* Add outgoing arguments area. Can be skipped if we eliminated
5819 all the function calls as dead code.
5820 Skipping is however impossible when function calls alloca. Alloca
5821 expander assumes that last current_function_outgoing_args_size
5822 of stack frame are unused. */
5823 if (ACCUMULATE_OUTGOING_ARGS
5824 && (!current_function_is_leaf || current_function_calls_alloca
5825 || ix86_current_function_calls_tls_descriptor))
5827 offset += current_function_outgoing_args_size;
5828 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5831 frame->outgoing_arguments_size = 0;
5833 /* Align stack boundary. Only needed if we're calling another function
5835 if (!current_function_is_leaf || current_function_calls_alloca
5836 || ix86_current_function_calls_tls_descriptor)
5837 frame->padding2 = ((offset + preferred_alignment - 1)
5838 & -preferred_alignment) - offset;
5840 frame->padding2 = 0;
5842 offset += frame->padding2;
5844 /* We've reached end of stack frame. */
5845 frame->stack_pointer_offset = offset;
5847 /* Size prologue needs to allocate. */
5848 frame->to_allocate =
5849 (size + frame->padding1 + frame->padding2
5850 + frame->outgoing_arguments_size + frame->va_arg_size);
5852 if ((!frame->to_allocate && frame->nregs <= 1)
5853 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5854 frame->save_regs_using_mov = false;
5856 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5857 && current_function_is_leaf
5858 && !ix86_current_function_calls_tls_descriptor)
5860 frame->red_zone_size = frame->to_allocate;
5861 if (frame->save_regs_using_mov)
5862 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5863 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5864 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5867 frame->red_zone_size = 0;
5868 frame->to_allocate -= frame->red_zone_size;
5869 frame->stack_pointer_offset -= frame->red_zone_size;
5871 fprintf (stderr, "\n");
5872 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5873 fprintf (stderr, "size: %ld\n", (long)size);
5874 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5875 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5876 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5877 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5878 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5879 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5880 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5881 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5882 (long)frame->hard_frame_pointer_offset);
5883 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5884 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5885 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5886 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5890 /* Emit code to save registers in the prologue. */
5893 ix86_emit_save_regs (void)
5898 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5899 if (ix86_save_reg (regno, true))
5901 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5902 RTX_FRAME_RELATED_P (insn) = 1;
5906 /* Emit code to save registers using MOV insns. First register
5907 is restored from POINTER + OFFSET. */
5909 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5914 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5915 if (ix86_save_reg (regno, true))
5917 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5919 gen_rtx_REG (Pmode, regno));
5920 RTX_FRAME_RELATED_P (insn) = 1;
5921 offset += UNITS_PER_WORD;
5925 /* Expand prologue or epilogue stack adjustment.
5926 The pattern exist to put a dependency on all ebp-based memory accesses.
5927 STYLE should be negative if instructions should be marked as frame related,
5928 zero if %r11 register is live and cannot be freely used and positive
5932 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5937 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5938 else if (x86_64_immediate_operand (offset, DImode))
5939 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5943 /* r11 is used by indirect sibcall return as well, set before the
5944 epilogue and used after the epilogue. ATM indirect sibcall
5945 shouldn't be used together with huge frame sizes in one
5946 function because of the frame_size check in sibcall.c. */
5948 r11 = gen_rtx_REG (DImode, R11_REG);
5949 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5951 RTX_FRAME_RELATED_P (insn) = 1;
5952 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5956 RTX_FRAME_RELATED_P (insn) = 1;
5959 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5962 ix86_internal_arg_pointer (void)
5964 bool has_force_align_arg_pointer =
5965 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5966 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5967 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5968 && DECL_NAME (current_function_decl)
5969 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5970 && DECL_FILE_SCOPE_P (current_function_decl))
5971 || ix86_force_align_arg_pointer
5972 || has_force_align_arg_pointer)
5974 /* Nested functions can't realign the stack due to a register
5976 if (DECL_CONTEXT (current_function_decl)
5977 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5979 if (ix86_force_align_arg_pointer)
5980 warning (0, "-mstackrealign ignored for nested functions");
5981 if (has_force_align_arg_pointer)
5982 error ("%s not supported for nested functions",
5983 ix86_force_align_arg_pointer_string);
5984 return virtual_incoming_args_rtx;
5986 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5987 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5990 return virtual_incoming_args_rtx;
5993 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5994 This is called from dwarf2out.c to emit call frame instructions
5995 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5997 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5999 rtx unspec = SET_SRC (pattern);
6000 gcc_assert (GET_CODE (unspec) == UNSPEC);
6004 case UNSPEC_REG_SAVE:
6005 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6006 SET_DEST (pattern));
6008 case UNSPEC_DEF_CFA:
6009 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6010 INTVAL (XVECEXP (unspec, 0, 0)));
6017 /* Expand the prologue into a bunch of separate insns. */
6020 ix86_expand_prologue (void)
6024 struct ix86_frame frame;
6025 HOST_WIDE_INT allocate;
6027 ix86_compute_frame_layout (&frame);
6029 if (cfun->machine->force_align_arg_pointer)
6033 /* Grab the argument pointer. */
6034 x = plus_constant (stack_pointer_rtx, 4);
6035 y = cfun->machine->force_align_arg_pointer;
6036 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6037 RTX_FRAME_RELATED_P (insn) = 1;
6039 /* The unwind info consists of two parts: install the fafp as the cfa,
6040 and record the fafp as the "save register" of the stack pointer.
6041 The later is there in order that the unwinder can see where it
6042 should restore the stack pointer across the and insn. */
6043 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6044 x = gen_rtx_SET (VOIDmode, y, x);
6045 RTX_FRAME_RELATED_P (x) = 1;
6046 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6048 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6049 RTX_FRAME_RELATED_P (y) = 1;
6050 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6051 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6052 REG_NOTES (insn) = x;
6054 /* Align the stack. */
6055 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6058 /* And here we cheat like madmen with the unwind info. We force the
6059 cfa register back to sp+4, which is exactly what it was at the
6060 start of the function. Re-pushing the return address results in
6061 the return at the same spot relative to the cfa, and thus is
6062 correct wrt the unwind info. */
6063 x = cfun->machine->force_align_arg_pointer;
6064 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6065 insn = emit_insn (gen_push (x));
6066 RTX_FRAME_RELATED_P (insn) = 1;
6069 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6070 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6071 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6072 REG_NOTES (insn) = x;
6075 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6076 slower on all targets. Also sdb doesn't like it. */
6078 if (frame_pointer_needed)
6080 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6081 RTX_FRAME_RELATED_P (insn) = 1;
6083 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6084 RTX_FRAME_RELATED_P (insn) = 1;
6087 allocate = frame.to_allocate;
6089 if (!frame.save_regs_using_mov)
6090 ix86_emit_save_regs ();
6092 allocate += frame.nregs * UNITS_PER_WORD;
6094 /* When using red zone we may start register saving before allocating
6095 the stack frame saving one cycle of the prologue. */
6096 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6097 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6098 : stack_pointer_rtx,
6099 -frame.nregs * UNITS_PER_WORD);
6103 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6104 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6105 GEN_INT (-allocate), -1);
6108 /* Only valid for Win32. */
6109 rtx eax = gen_rtx_REG (Pmode, 0);
6113 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6115 if (TARGET_64BIT_MS_ABI)
6118 eax_live = ix86_eax_live_at_start_p ();
6122 emit_insn (gen_push (eax));
6123 allocate -= UNITS_PER_WORD;
6126 emit_move_insn (eax, GEN_INT (allocate));
6129 insn = gen_allocate_stack_worker_64 (eax);
6131 insn = gen_allocate_stack_worker_32 (eax);
6132 insn = emit_insn (insn);
6133 RTX_FRAME_RELATED_P (insn) = 1;
6134 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6135 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6136 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6137 t, REG_NOTES (insn));
6141 if (frame_pointer_needed)
6142 t = plus_constant (hard_frame_pointer_rtx,
6145 - frame.nregs * UNITS_PER_WORD);
6147 t = plus_constant (stack_pointer_rtx, allocate);
6148 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6152 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6154 if (!frame_pointer_needed || !frame.to_allocate)
6155 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6157 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6158 -frame.nregs * UNITS_PER_WORD);
6161 pic_reg_used = false;
6162 if (pic_offset_table_rtx
6163 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6164 || current_function_profile))
6166 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6168 if (alt_pic_reg_used != INVALID_REGNUM)
6169 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6171 pic_reg_used = true;
6178 if (ix86_cmodel == CM_LARGE_PIC)
6180 rtx tmp_reg = gen_rtx_REG (DImode,
6181 FIRST_REX_INT_REG + 3 /* R11 */);
6182 rtx label = gen_label_rtx ();
6184 LABEL_PRESERVE_P (label) = 1;
6185 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6186 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6187 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6188 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6189 pic_offset_table_rtx, tmp_reg));
6192 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6195 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6198 /* Prevent function calls from be scheduled before the call to mcount.
6199 In the pic_reg_used case, make sure that the got load isn't deleted. */
6200 if (current_function_profile)
6203 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6204 emit_insn (gen_blockage ());
6208 /* Emit code to restore saved registers using MOV insns. First register
6209 is restored from POINTER + OFFSET. */
6211 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6212 int maybe_eh_return)
6215 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6217 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6218 if (ix86_save_reg (regno, maybe_eh_return))
6220 /* Ensure that adjust_address won't be forced to produce pointer
6221 out of range allowed by x86-64 instruction set. */
6222 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6226 r11 = gen_rtx_REG (DImode, R11_REG);
6227 emit_move_insn (r11, GEN_INT (offset));
6228 emit_insn (gen_adddi3 (r11, r11, pointer));
6229 base_address = gen_rtx_MEM (Pmode, r11);
6232 emit_move_insn (gen_rtx_REG (Pmode, regno),
6233 adjust_address (base_address, Pmode, offset));
6234 offset += UNITS_PER_WORD;
6238 /* Restore function stack, frame, and registers. */
6241 ix86_expand_epilogue (int style)
6244 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6245 struct ix86_frame frame;
6246 HOST_WIDE_INT offset;
6248 ix86_compute_frame_layout (&frame);
6250 /* Calculate start of saved registers relative to ebp. Special care
6251 must be taken for the normal return case of a function using
6252 eh_return: the eax and edx registers are marked as saved, but not
6253 restored along this path. */
6254 offset = frame.nregs;
6255 if (current_function_calls_eh_return && style != 2)
6257 offset *= -UNITS_PER_WORD;
6259 /* If we're only restoring one register and sp is not valid then
6260 using a move instruction to restore the register since it's
6261 less work than reloading sp and popping the register.
6263 The default code result in stack adjustment using add/lea instruction,
6264 while this code results in LEAVE instruction (or discrete equivalent),
6265 so it is profitable in some other cases as well. Especially when there
6266 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6267 and there is exactly one register to pop. This heuristic may need some
6268 tuning in future. */
6269 if ((!sp_valid && frame.nregs <= 1)
6270 || (TARGET_EPILOGUE_USING_MOVE
6271 && cfun->machine->use_fast_prologue_epilogue
6272 && (frame.nregs > 1 || frame.to_allocate))
6273 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6274 || (frame_pointer_needed && TARGET_USE_LEAVE
6275 && cfun->machine->use_fast_prologue_epilogue
6276 && frame.nregs == 1)
6277 || current_function_calls_eh_return)
6279 /* Restore registers. We can use ebp or esp to address the memory
6280 locations. If both are available, default to ebp, since offsets
6281 are known to be small. Only exception is esp pointing directly to the
6282 end of block of saved registers, where we may simplify addressing
6285 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6286 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6287 frame.to_allocate, style == 2);
6289 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6290 offset, style == 2);
6292 /* eh_return epilogues need %ecx added to the stack pointer. */
6295 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6297 if (frame_pointer_needed)
6299 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6300 tmp = plus_constant (tmp, UNITS_PER_WORD);
6301 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6303 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6304 emit_move_insn (hard_frame_pointer_rtx, tmp);
6306 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6311 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6312 tmp = plus_constant (tmp, (frame.to_allocate
6313 + frame.nregs * UNITS_PER_WORD));
6314 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6317 else if (!frame_pointer_needed)
6318 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6319 GEN_INT (frame.to_allocate
6320 + frame.nregs * UNITS_PER_WORD),
6322 /* If not an i386, mov & pop is faster than "leave". */
6323 else if (TARGET_USE_LEAVE || optimize_size
6324 || !cfun->machine->use_fast_prologue_epilogue)
6325 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6328 pro_epilogue_adjust_stack (stack_pointer_rtx,
6329 hard_frame_pointer_rtx,
6332 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6334 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6339 /* First step is to deallocate the stack frame so that we can
6340 pop the registers. */
6343 gcc_assert (frame_pointer_needed);
6344 pro_epilogue_adjust_stack (stack_pointer_rtx,
6345 hard_frame_pointer_rtx,
6346 GEN_INT (offset), style);
6348 else if (frame.to_allocate)
6349 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6350 GEN_INT (frame.to_allocate), style);
6352 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6353 if (ix86_save_reg (regno, false))
6356 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6358 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6360 if (frame_pointer_needed)
6362 /* Leave results in shorter dependency chains on CPUs that are
6363 able to grok it fast. */
6364 if (TARGET_USE_LEAVE)
6365 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6366 else if (TARGET_64BIT)
6367 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6369 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6373 if (cfun->machine->force_align_arg_pointer)
6375 emit_insn (gen_addsi3 (stack_pointer_rtx,
6376 cfun->machine->force_align_arg_pointer,
6380 /* Sibcall epilogues don't want a return instruction. */
6384 if (current_function_pops_args && current_function_args_size)
6386 rtx popc = GEN_INT (current_function_pops_args);
6388 /* i386 can only pop 64K bytes. If asked to pop more, pop
6389 return address, do explicit add, and jump indirectly to the
6392 if (current_function_pops_args >= 65536)
6394 rtx ecx = gen_rtx_REG (SImode, 2);
6396 /* There is no "pascal" calling convention in any 64bit ABI. */
6397 gcc_assert (!TARGET_64BIT);
6399 emit_insn (gen_popsi1 (ecx));
6400 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6401 emit_jump_insn (gen_return_indirect_internal (ecx));
6404 emit_jump_insn (gen_return_pop_internal (popc));
6407 emit_jump_insn (gen_return_internal ());
6410 /* Reset from the function's potential modifications. */
6413 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6414 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6416 if (pic_offset_table_rtx)
6417 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6419 /* Mach-O doesn't support labels at the end of objects, so if
6420 it looks like we might want one, insert a NOP. */
6422 rtx insn = get_last_insn ();
6425 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6426 insn = PREV_INSN (insn);
6430 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6431 fputs ("\tnop\n", file);
6437 /* Extract the parts of an RTL expression that is a valid memory address
6438 for an instruction. Return 0 if the structure of the address is
6439 grossly off. Return -1 if the address contains ASHIFT, so it is not
6440 strictly valid, but still used for computing length of lea instruction. */
6443 ix86_decompose_address (rtx addr, struct ix86_address *out)
6445 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6446 rtx base_reg, index_reg;
6447 HOST_WIDE_INT scale = 1;
6448 rtx scale_rtx = NULL_RTX;
6450 enum ix86_address_seg seg = SEG_DEFAULT;
6452 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6454 else if (GET_CODE (addr) == PLUS)
6464 addends[n++] = XEXP (op, 1);
6467 while (GET_CODE (op) == PLUS);
6472 for (i = n; i >= 0; --i)
6475 switch (GET_CODE (op))
6480 index = XEXP (op, 0);
6481 scale_rtx = XEXP (op, 1);
6485 if (XINT (op, 1) == UNSPEC_TP
6486 && TARGET_TLS_DIRECT_SEG_REFS
6487 && seg == SEG_DEFAULT)
6488 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6517 else if (GET_CODE (addr) == MULT)
6519 index = XEXP (addr, 0); /* index*scale */
6520 scale_rtx = XEXP (addr, 1);
6522 else if (GET_CODE (addr) == ASHIFT)
6526 /* We're called for lea too, which implements ashift on occasion. */
6527 index = XEXP (addr, 0);
6528 tmp = XEXP (addr, 1);
6529 if (!CONST_INT_P (tmp))
6531 scale = INTVAL (tmp);
6532 if ((unsigned HOST_WIDE_INT) scale > 3)
6538 disp = addr; /* displacement */
6540 /* Extract the integral value of scale. */
6543 if (!CONST_INT_P (scale_rtx))
6545 scale = INTVAL (scale_rtx);
6548 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6549 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6551 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6552 if (base_reg && index_reg && scale == 1
6553 && (index_reg == arg_pointer_rtx
6554 || index_reg == frame_pointer_rtx
6555 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6558 tmp = base, base = index, index = tmp;
6559 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6562 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6563 if ((base_reg == hard_frame_pointer_rtx
6564 || base_reg == frame_pointer_rtx
6565 || base_reg == arg_pointer_rtx) && !disp)
6568 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6569 Avoid this by transforming to [%esi+0]. */
6570 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6571 && base_reg && !index_reg && !disp
6573 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6576 /* Special case: encode reg+reg instead of reg*2. */
6577 if (!base && index && scale && scale == 2)
6578 base = index, base_reg = index_reg, scale = 1;
6580 /* Special case: scaling cannot be encoded without base or displacement. */
6581 if (!base && !disp && index && scale != 1)
6593 /* Return cost of the memory address x.
6594 For i386, it is better to use a complex address than let gcc copy
6595 the address into a reg and make a new pseudo. But not if the address
6596 requires to two regs - that would mean more pseudos with longer
6599 ix86_address_cost (rtx x)
6601 struct ix86_address parts;
6603 int ok = ix86_decompose_address (x, &parts);
6607 if (parts.base && GET_CODE (parts.base) == SUBREG)
6608 parts.base = SUBREG_REG (parts.base);
6609 if (parts.index && GET_CODE (parts.index) == SUBREG)
6610 parts.index = SUBREG_REG (parts.index);
6612 /* Attempt to minimize number of registers in the address. */
6614 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6616 && (!REG_P (parts.index)
6617 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6621 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6623 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6624 && parts.base != parts.index)
6627 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6628 since it's predecode logic can't detect the length of instructions
6629 and it degenerates to vector decoded. Increase cost of such
6630 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6631 to split such addresses or even refuse such addresses at all.
6633 Following addressing modes are affected:
6638 The first and last case may be avoidable by explicitly coding the zero in
6639 memory address, but I don't have AMD-K6 machine handy to check this
6643 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6644 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6645 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6651 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6652 this is used for to form addresses to local data when -fPIC is in
6656 darwin_local_data_pic (rtx disp)
6658 if (GET_CODE (disp) == MINUS)
6660 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6661 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6662 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6664 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6665 if (! strcmp (sym_name, "<pic base>"))
6673 /* Determine if a given RTX is a valid constant. We already know this
6674 satisfies CONSTANT_P. */
6677 legitimate_constant_p (rtx x)
6679 switch (GET_CODE (x))
6684 if (GET_CODE (x) == PLUS)
6686 if (!CONST_INT_P (XEXP (x, 1)))
6691 if (TARGET_MACHO && darwin_local_data_pic (x))
6694 /* Only some unspecs are valid as "constants". */
6695 if (GET_CODE (x) == UNSPEC)
6696 switch (XINT (x, 1))
6701 return TARGET_64BIT;
6704 x = XVECEXP (x, 0, 0);
6705 return (GET_CODE (x) == SYMBOL_REF
6706 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6708 x = XVECEXP (x, 0, 0);
6709 return (GET_CODE (x) == SYMBOL_REF
6710 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6715 /* We must have drilled down to a symbol. */
6716 if (GET_CODE (x) == LABEL_REF)
6718 if (GET_CODE (x) != SYMBOL_REF)
6723 /* TLS symbols are never valid. */
6724 if (SYMBOL_REF_TLS_MODEL (x))
6727 /* DLLIMPORT symbols are never valid. */
6728 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6729 && SYMBOL_REF_DLLIMPORT_P (x))
6734 if (GET_MODE (x) == TImode
6735 && x != CONST0_RTX (TImode)
6741 if (x == CONST0_RTX (GET_MODE (x)))
6749 /* Otherwise we handle everything else in the move patterns. */
6753 /* Determine if it's legal to put X into the constant pool. This
6754 is not possible for the address of thread-local symbols, which
6755 is checked above. */
6758 ix86_cannot_force_const_mem (rtx x)
6760 /* We can always put integral constants and vectors in memory. */
6761 switch (GET_CODE (x))
6771 return !legitimate_constant_p (x);
6774 /* Determine if a given RTX is a valid constant address. */
6777 constant_address_p (rtx x)
6779 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6782 /* Nonzero if the constant value X is a legitimate general operand
6783 when generating PIC code. It is given that flag_pic is on and
6784 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6787 legitimate_pic_operand_p (rtx x)
6791 switch (GET_CODE (x))
6794 inner = XEXP (x, 0);
6795 if (GET_CODE (inner) == PLUS
6796 && CONST_INT_P (XEXP (inner, 1)))
6797 inner = XEXP (inner, 0);
6799 /* Only some unspecs are valid as "constants". */
6800 if (GET_CODE (inner) == UNSPEC)
6801 switch (XINT (inner, 1))
6806 return TARGET_64BIT;
6808 x = XVECEXP (inner, 0, 0);
6809 return (GET_CODE (x) == SYMBOL_REF
6810 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6818 return legitimate_pic_address_disp_p (x);
6825 /* Determine if a given CONST RTX is a valid memory displacement
6829 legitimate_pic_address_disp_p (rtx disp)
6833 /* In 64bit mode we can allow direct addresses of symbols and labels
6834 when they are not dynamic symbols. */
6837 rtx op0 = disp, op1;
6839 switch (GET_CODE (disp))
6845 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6847 op0 = XEXP (XEXP (disp, 0), 0);
6848 op1 = XEXP (XEXP (disp, 0), 1);
6849 if (!CONST_INT_P (op1)
6850 || INTVAL (op1) >= 16*1024*1024
6851 || INTVAL (op1) < -16*1024*1024)
6853 if (GET_CODE (op0) == LABEL_REF)
6855 if (GET_CODE (op0) != SYMBOL_REF)
6860 /* TLS references should always be enclosed in UNSPEC. */
6861 if (SYMBOL_REF_TLS_MODEL (op0))
6863 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6864 && ix86_cmodel != CM_LARGE_PIC)
6872 if (GET_CODE (disp) != CONST)
6874 disp = XEXP (disp, 0);
6878 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6879 of GOT tables. We should not need these anyway. */
6880 if (GET_CODE (disp) != UNSPEC
6881 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6882 && XINT (disp, 1) != UNSPEC_GOTOFF
6883 && XINT (disp, 1) != UNSPEC_PLTOFF))
6886 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6887 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6893 if (GET_CODE (disp) == PLUS)
6895 if (!CONST_INT_P (XEXP (disp, 1)))
6897 disp = XEXP (disp, 0);
6901 if (TARGET_MACHO && darwin_local_data_pic (disp))
6904 if (GET_CODE (disp) != UNSPEC)
6907 switch (XINT (disp, 1))
6912 /* We need to check for both symbols and labels because VxWorks loads
6913 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6915 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6916 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6918 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6919 While ABI specify also 32bit relocation but we don't produce it in
6920 small PIC model at all. */
6921 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6922 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6924 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6926 case UNSPEC_GOTTPOFF:
6927 case UNSPEC_GOTNTPOFF:
6928 case UNSPEC_INDNTPOFF:
6931 disp = XVECEXP (disp, 0, 0);
6932 return (GET_CODE (disp) == SYMBOL_REF
6933 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6935 disp = XVECEXP (disp, 0, 0);
6936 return (GET_CODE (disp) == SYMBOL_REF
6937 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6939 disp = XVECEXP (disp, 0, 0);
6940 return (GET_CODE (disp) == SYMBOL_REF
6941 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6947 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6948 memory address for an instruction. The MODE argument is the machine mode
6949 for the MEM expression that wants to use this address.
6951 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6952 convert common non-canonical forms to canonical form so that they will
6956 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6957 rtx addr, int strict)
6959 struct ix86_address parts;
6960 rtx base, index, disp;
6961 HOST_WIDE_INT scale;
6962 const char *reason = NULL;
6963 rtx reason_rtx = NULL_RTX;
6965 if (ix86_decompose_address (addr, &parts) <= 0)
6967 reason = "decomposition failed";
6972 index = parts.index;
6974 scale = parts.scale;
6976 /* Validate base register.
6978 Don't allow SUBREG's that span more than a word here. It can lead to spill
6979 failures when the base is one word out of a two word structure, which is
6980 represented internally as a DImode int. */
6989 else if (GET_CODE (base) == SUBREG
6990 && REG_P (SUBREG_REG (base))
6991 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6993 reg = SUBREG_REG (base);
6996 reason = "base is not a register";
7000 if (GET_MODE (base) != Pmode)
7002 reason = "base is not in Pmode";
7006 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7007 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7009 reason = "base is not valid";
7014 /* Validate index register.
7016 Don't allow SUBREG's that span more than a word here -- same as above. */
7025 else if (GET_CODE (index) == SUBREG
7026 && REG_P (SUBREG_REG (index))
7027 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7029 reg = SUBREG_REG (index);
7032 reason = "index is not a register";
7036 if (GET_MODE (index) != Pmode)
7038 reason = "index is not in Pmode";
7042 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7043 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7045 reason = "index is not valid";
7050 /* Validate scale factor. */
7053 reason_rtx = GEN_INT (scale);
7056 reason = "scale without index";
7060 if (scale != 2 && scale != 4 && scale != 8)
7062 reason = "scale is not a valid multiplier";
7067 /* Validate displacement. */
7072 if (GET_CODE (disp) == CONST
7073 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7074 switch (XINT (XEXP (disp, 0), 1))
7076 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7077 used. While ABI specify also 32bit relocations, we don't produce
7078 them at all and use IP relative instead. */
7081 gcc_assert (flag_pic);
7083 goto is_legitimate_pic;
7084 reason = "64bit address unspec";
7087 case UNSPEC_GOTPCREL:
7088 gcc_assert (flag_pic);
7089 goto is_legitimate_pic;
7091 case UNSPEC_GOTTPOFF:
7092 case UNSPEC_GOTNTPOFF:
7093 case UNSPEC_INDNTPOFF:
7099 reason = "invalid address unspec";
7103 else if (SYMBOLIC_CONST (disp)
7107 && MACHOPIC_INDIRECT
7108 && !machopic_operand_p (disp)
7114 if (TARGET_64BIT && (index || base))
7116 /* foo@dtpoff(%rX) is ok. */
7117 if (GET_CODE (disp) != CONST
7118 || GET_CODE (XEXP (disp, 0)) != PLUS
7119 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7120 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7121 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7122 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7124 reason = "non-constant pic memory reference";
7128 else if (! legitimate_pic_address_disp_p (disp))
7130 reason = "displacement is an invalid pic construct";
7134 /* This code used to verify that a symbolic pic displacement
7135 includes the pic_offset_table_rtx register.
7137 While this is good idea, unfortunately these constructs may
7138 be created by "adds using lea" optimization for incorrect
7147 This code is nonsensical, but results in addressing
7148 GOT table with pic_offset_table_rtx base. We can't
7149 just refuse it easily, since it gets matched by
7150 "addsi3" pattern, that later gets split to lea in the
7151 case output register differs from input. While this
7152 can be handled by separate addsi pattern for this case
7153 that never results in lea, this seems to be easier and
7154 correct fix for crash to disable this test. */
7156 else if (GET_CODE (disp) != LABEL_REF
7157 && !CONST_INT_P (disp)
7158 && (GET_CODE (disp) != CONST
7159 || !legitimate_constant_p (disp))
7160 && (GET_CODE (disp) != SYMBOL_REF
7161 || !legitimate_constant_p (disp)))
7163 reason = "displacement is not constant";
7166 else if (TARGET_64BIT
7167 && !x86_64_immediate_operand (disp, VOIDmode))
7169 reason = "displacement is out of range";
7174 /* Everything looks valid. */
7181 /* Return a unique alias set for the GOT. */
7183 static alias_set_type
7184 ix86_GOT_alias_set (void)
7186 static alias_set_type set = -1;
7188 set = new_alias_set ();
7192 /* Return a legitimate reference for ORIG (an address) using the
7193 register REG. If REG is 0, a new pseudo is generated.
7195 There are two types of references that must be handled:
7197 1. Global data references must load the address from the GOT, via
7198 the PIC reg. An insn is emitted to do this load, and the reg is
7201 2. Static data references, constant pool addresses, and code labels
7202 compute the address as an offset from the GOT, whose base is in
7203 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7204 differentiate them from global data objects. The returned
7205 address is the PIC reg + an unspec constant.
7207 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7208 reg also appears in the address. */
7211 legitimize_pic_address (rtx orig, rtx reg)
7218 if (TARGET_MACHO && !TARGET_64BIT)
7221 reg = gen_reg_rtx (Pmode);
7222 /* Use the generic Mach-O PIC machinery. */
7223 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7227 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7229 else if (TARGET_64BIT
7230 && ix86_cmodel != CM_SMALL_PIC
7231 && gotoff_operand (addr, Pmode))
7234 /* This symbol may be referenced via a displacement from the PIC
7235 base address (@GOTOFF). */
7237 if (reload_in_progress)
7238 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7239 if (GET_CODE (addr) == CONST)
7240 addr = XEXP (addr, 0);
7241 if (GET_CODE (addr) == PLUS)
7243 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7245 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7248 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7249 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7251 tmpreg = gen_reg_rtx (Pmode);
7254 emit_move_insn (tmpreg, new_rtx);
7258 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7259 tmpreg, 1, OPTAB_DIRECT);
7262 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7264 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7266 /* This symbol may be referenced via a displacement from the PIC
7267 base address (@GOTOFF). */
7269 if (reload_in_progress)
7270 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7271 if (GET_CODE (addr) == CONST)
7272 addr = XEXP (addr, 0);
7273 if (GET_CODE (addr) == PLUS)
7275 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7277 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7280 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7281 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7282 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7286 emit_move_insn (reg, new_rtx);
7290 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7291 /* We can't use @GOTOFF for text labels on VxWorks;
7292 see gotoff_operand. */
7293 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7295 /* Given that we've already handled dllimport variables separately
7296 in legitimize_address, and all other variables should satisfy
7297 legitimate_pic_address_disp_p, we should never arrive here. */
7298 gcc_assert (!TARGET_64BIT_MS_ABI);
7300 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7302 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7303 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7304 new_rtx = gen_const_mem (Pmode, new_rtx);
7305 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7308 reg = gen_reg_rtx (Pmode);
7309 /* Use directly gen_movsi, otherwise the address is loaded
7310 into register for CSE. We don't want to CSE this addresses,
7311 instead we CSE addresses from the GOT table, so skip this. */
7312 emit_insn (gen_movsi (reg, new_rtx));
7317 /* This symbol must be referenced via a load from the
7318 Global Offset Table (@GOT). */
7320 if (reload_in_progress)
7321 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7322 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7323 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7325 new_rtx = force_reg (Pmode, new_rtx);
7326 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7327 new_rtx = gen_const_mem (Pmode, new_rtx);
7328 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7331 reg = gen_reg_rtx (Pmode);
7332 emit_move_insn (reg, new_rtx);
7338 if (CONST_INT_P (addr)
7339 && !x86_64_immediate_operand (addr, VOIDmode))
7343 emit_move_insn (reg, addr);
7347 new_rtx = force_reg (Pmode, addr);
7349 else if (GET_CODE (addr) == CONST)
7351 addr = XEXP (addr, 0);
7353 /* We must match stuff we generate before. Assume the only
7354 unspecs that can get here are ours. Not that we could do
7355 anything with them anyway.... */
7356 if (GET_CODE (addr) == UNSPEC
7357 || (GET_CODE (addr) == PLUS
7358 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7360 gcc_assert (GET_CODE (addr) == PLUS);
7362 if (GET_CODE (addr) == PLUS)
7364 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7366 /* Check first to see if this is a constant offset from a @GOTOFF
7367 symbol reference. */
7368 if (gotoff_operand (op0, Pmode)
7369 && CONST_INT_P (op1))
7373 if (reload_in_progress)
7374 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7375 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7377 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7378 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7379 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7383 emit_move_insn (reg, new_rtx);
7389 if (INTVAL (op1) < -16*1024*1024
7390 || INTVAL (op1) >= 16*1024*1024)
7392 if (!x86_64_immediate_operand (op1, Pmode))
7393 op1 = force_reg (Pmode, op1);
7394 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7400 base = legitimize_pic_address (XEXP (addr, 0), reg);
7401 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7402 base == reg ? NULL_RTX : reg);
7404 if (CONST_INT_P (new_rtx))
7405 new_rtx = plus_constant (base, INTVAL (new_rtx));
7408 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7410 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7411 new_rtx = XEXP (new_rtx, 1);
7413 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7421 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7424 get_thread_pointer (int to_reg)
7428 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7432 reg = gen_reg_rtx (Pmode);
7433 insn = gen_rtx_SET (VOIDmode, reg, tp);
7434 insn = emit_insn (insn);
7439 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7440 false if we expect this to be used for a memory address and true if
7441 we expect to load the address into a register. */
7444 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7446 rtx dest, base, off, pic, tp;
7451 case TLS_MODEL_GLOBAL_DYNAMIC:
7452 dest = gen_reg_rtx (Pmode);
7453 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7455 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7457 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7460 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7461 insns = get_insns ();
7464 CONST_OR_PURE_CALL_P (insns) = 1;
7465 emit_libcall_block (insns, dest, rax, x);
7467 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7468 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7470 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7472 if (TARGET_GNU2_TLS)
7474 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7476 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7480 case TLS_MODEL_LOCAL_DYNAMIC:
7481 base = gen_reg_rtx (Pmode);
7482 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7484 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7486 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7489 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7490 insns = get_insns ();
7493 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7494 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7495 CONST_OR_PURE_CALL_P (insns) = 1;
7496 emit_libcall_block (insns, base, rax, note);
7498 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7499 emit_insn (gen_tls_local_dynamic_base_64 (base));
7501 emit_insn (gen_tls_local_dynamic_base_32 (base));
7503 if (TARGET_GNU2_TLS)
7505 rtx x = ix86_tls_module_base ();
7507 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7508 gen_rtx_MINUS (Pmode, x, tp));
7511 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7512 off = gen_rtx_CONST (Pmode, off);
7514 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7516 if (TARGET_GNU2_TLS)
7518 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7520 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7525 case TLS_MODEL_INITIAL_EXEC:
7529 type = UNSPEC_GOTNTPOFF;
7533 if (reload_in_progress)
7534 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7535 pic = pic_offset_table_rtx;
7536 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7538 else if (!TARGET_ANY_GNU_TLS)
7540 pic = gen_reg_rtx (Pmode);
7541 emit_insn (gen_set_got (pic));
7542 type = UNSPEC_GOTTPOFF;
7547 type = UNSPEC_INDNTPOFF;
7550 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7551 off = gen_rtx_CONST (Pmode, off);
7553 off = gen_rtx_PLUS (Pmode, pic, off);
7554 off = gen_const_mem (Pmode, off);
7555 set_mem_alias_set (off, ix86_GOT_alias_set ());
7557 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7559 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7560 off = force_reg (Pmode, off);
7561 return gen_rtx_PLUS (Pmode, base, off);
7565 base = get_thread_pointer (true);
7566 dest = gen_reg_rtx (Pmode);
7567 emit_insn (gen_subsi3 (dest, base, off));
7571 case TLS_MODEL_LOCAL_EXEC:
7572 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7573 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7574 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7575 off = gen_rtx_CONST (Pmode, off);
7577 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7579 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7580 return gen_rtx_PLUS (Pmode, base, off);
7584 base = get_thread_pointer (true);
7585 dest = gen_reg_rtx (Pmode);
7586 emit_insn (gen_subsi3 (dest, base, off));
7597 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7600 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7601 htab_t dllimport_map;
7604 get_dllimport_decl (tree decl)
7606 struct tree_map *h, in;
7610 size_t namelen, prefixlen;
7616 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7618 in.hash = htab_hash_pointer (decl);
7619 in.base.from = decl;
7620 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7621 h = (struct tree_map *) *loc;
7625 *loc = h = GGC_NEW (struct tree_map);
7627 h->base.from = decl;
7628 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7629 DECL_ARTIFICIAL (to) = 1;
7630 DECL_IGNORED_P (to) = 1;
7631 DECL_EXTERNAL (to) = 1;
7632 TREE_READONLY (to) = 1;
7634 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7635 name = targetm.strip_name_encoding (name);
7636 if (name[0] == FASTCALL_PREFIX)
7642 prefix = "*__imp__";
7644 namelen = strlen (name);
7645 prefixlen = strlen (prefix);
7646 imp_name = (char *) alloca (namelen + prefixlen + 1);
7647 memcpy (imp_name, prefix, prefixlen);
7648 memcpy (imp_name + prefixlen, name, namelen + 1);
7650 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7651 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7652 SET_SYMBOL_REF_DECL (rtl, to);
7653 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7655 rtl = gen_const_mem (Pmode, rtl);
7656 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7658 SET_DECL_RTL (to, rtl);
7663 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7664 true if we require the result be a register. */
7667 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7672 gcc_assert (SYMBOL_REF_DECL (symbol));
7673 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7675 x = DECL_RTL (imp_decl);
7677 x = force_reg (Pmode, x);
7681 /* Try machine-dependent ways of modifying an illegitimate address
7682 to be legitimate. If we find one, return the new, valid address.
7683 This macro is used in only one place: `memory_address' in explow.c.
7685 OLDX is the address as it was before break_out_memory_refs was called.
7686 In some cases it is useful to look at this to decide what needs to be done.
7688 MODE and WIN are passed so that this macro can use
7689 GO_IF_LEGITIMATE_ADDRESS.
7691 It is always safe for this macro to do nothing. It exists to recognize
7692 opportunities to optimize the output.
7694 For the 80386, we handle X+REG by loading X into a register R and
7695 using R+REG. R will go in a general reg and indexing will be used.
7696 However, if REG is a broken-out memory address or multiplication,
7697 nothing needs to be done because REG can certainly go in a general reg.
7699 When -fpic is used, special handling is needed for symbolic references.
7700 See comments by legitimize_pic_address in i386.c for details. */
7703 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7708 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7710 return legitimize_tls_address (x, (enum tls_model) log, false);
7711 if (GET_CODE (x) == CONST
7712 && GET_CODE (XEXP (x, 0)) == PLUS
7713 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7714 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7716 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7717 (enum tls_model) log, false);
7718 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7721 if (flag_pic && SYMBOLIC_CONST (x))
7722 return legitimize_pic_address (x, 0);
7724 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7726 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7727 return legitimize_dllimport_symbol (x, true);
7728 if (GET_CODE (x) == CONST
7729 && GET_CODE (XEXP (x, 0)) == PLUS
7730 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7731 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7733 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7734 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7738 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7739 if (GET_CODE (x) == ASHIFT
7740 && CONST_INT_P (XEXP (x, 1))
7741 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7744 log = INTVAL (XEXP (x, 1));
7745 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7746 GEN_INT (1 << log));
7749 if (GET_CODE (x) == PLUS)
7751 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7753 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7754 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7755 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7758 log = INTVAL (XEXP (XEXP (x, 0), 1));
7759 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7760 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7761 GEN_INT (1 << log));
7764 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7765 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7766 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7769 log = INTVAL (XEXP (XEXP (x, 1), 1));
7770 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7771 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7772 GEN_INT (1 << log));
7775 /* Put multiply first if it isn't already. */
7776 if (GET_CODE (XEXP (x, 1)) == MULT)
7778 rtx tmp = XEXP (x, 0);
7779 XEXP (x, 0) = XEXP (x, 1);
7784 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7785 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7786 created by virtual register instantiation, register elimination, and
7787 similar optimizations. */
7788 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7791 x = gen_rtx_PLUS (Pmode,
7792 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7793 XEXP (XEXP (x, 1), 0)),
7794 XEXP (XEXP (x, 1), 1));
7798 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7799 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7800 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7801 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7802 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7803 && CONSTANT_P (XEXP (x, 1)))
7806 rtx other = NULL_RTX;
7808 if (CONST_INT_P (XEXP (x, 1)))
7810 constant = XEXP (x, 1);
7811 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7813 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7815 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7816 other = XEXP (x, 1);
7824 x = gen_rtx_PLUS (Pmode,
7825 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7826 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7827 plus_constant (other, INTVAL (constant)));
7831 if (changed && legitimate_address_p (mode, x, FALSE))
7834 if (GET_CODE (XEXP (x, 0)) == MULT)
7837 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7840 if (GET_CODE (XEXP (x, 1)) == MULT)
7843 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7847 && REG_P (XEXP (x, 1))
7848 && REG_P (XEXP (x, 0)))
7851 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7854 x = legitimize_pic_address (x, 0);
7857 if (changed && legitimate_address_p (mode, x, FALSE))
7860 if (REG_P (XEXP (x, 0)))
7862 rtx temp = gen_reg_rtx (Pmode);
7863 rtx val = force_operand (XEXP (x, 1), temp);
7865 emit_move_insn (temp, val);
7871 else if (REG_P (XEXP (x, 1)))
7873 rtx temp = gen_reg_rtx (Pmode);
7874 rtx val = force_operand (XEXP (x, 0), temp);
7876 emit_move_insn (temp, val);
7886 /* Print an integer constant expression in assembler syntax. Addition
7887 and subtraction are the only arithmetic that may appear in these
7888 expressions. FILE is the stdio stream to write to, X is the rtx, and
7889 CODE is the operand print code from the output string. */
7892 output_pic_addr_const (FILE *file, rtx x, int code)
7896 switch (GET_CODE (x))
7899 gcc_assert (flag_pic);
7904 if (! TARGET_MACHO || TARGET_64BIT)
7905 output_addr_const (file, x);
7908 const char *name = XSTR (x, 0);
7910 /* Mark the decl as referenced so that cgraph will
7911 output the function. */
7912 if (SYMBOL_REF_DECL (x))
7913 mark_decl_referenced (SYMBOL_REF_DECL (x));
7916 if (MACHOPIC_INDIRECT
7917 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7918 name = machopic_indirection_name (x, /*stub_p=*/true);
7920 assemble_name (file, name);
7922 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7923 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7924 fputs ("@PLT", file);
7931 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7932 assemble_name (asm_out_file, buf);
7936 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7940 /* This used to output parentheses around the expression,
7941 but that does not work on the 386 (either ATT or BSD assembler). */
7942 output_pic_addr_const (file, XEXP (x, 0), code);
7946 if (GET_MODE (x) == VOIDmode)
7948 /* We can use %d if the number is <32 bits and positive. */
7949 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7950 fprintf (file, "0x%lx%08lx",
7951 (unsigned long) CONST_DOUBLE_HIGH (x),
7952 (unsigned long) CONST_DOUBLE_LOW (x));
7954 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7957 /* We can't handle floating point constants;
7958 PRINT_OPERAND must handle them. */
7959 output_operand_lossage ("floating constant misused");
7963 /* Some assemblers need integer constants to appear first. */
7964 if (CONST_INT_P (XEXP (x, 0)))
7966 output_pic_addr_const (file, XEXP (x, 0), code);
7968 output_pic_addr_const (file, XEXP (x, 1), code);
7972 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7973 output_pic_addr_const (file, XEXP (x, 1), code);
7975 output_pic_addr_const (file, XEXP (x, 0), code);
7981 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7982 output_pic_addr_const (file, XEXP (x, 0), code);
7984 output_pic_addr_const (file, XEXP (x, 1), code);
7986 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7990 gcc_assert (XVECLEN (x, 0) == 1);
7991 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7992 switch (XINT (x, 1))
7995 fputs ("@GOT", file);
7998 fputs ("@GOTOFF", file);
8001 fputs ("@PLTOFF", file);
8003 case UNSPEC_GOTPCREL:
8004 fputs ("@GOTPCREL(%rip)", file);
8006 case UNSPEC_GOTTPOFF:
8007 /* FIXME: This might be @TPOFF in Sun ld too. */
8008 fputs ("@GOTTPOFF", file);
8011 fputs ("@TPOFF", file);
8015 fputs ("@TPOFF", file);
8017 fputs ("@NTPOFF", file);
8020 fputs ("@DTPOFF", file);
8022 case UNSPEC_GOTNTPOFF:
8024 fputs ("@GOTTPOFF(%rip)", file);
8026 fputs ("@GOTNTPOFF", file);
8028 case UNSPEC_INDNTPOFF:
8029 fputs ("@INDNTPOFF", file);
8032 output_operand_lossage ("invalid UNSPEC as operand");
8038 output_operand_lossage ("invalid expression as operand");
8042 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8043 We need to emit DTP-relative relocations. */
8045 static void ATTRIBUTE_UNUSED
8046 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8048 fputs (ASM_LONG, file);
8049 output_addr_const (file, x);
8050 fputs ("@DTPOFF", file);
8056 fputs (", 0", file);
8063 /* In the name of slightly smaller debug output, and to cater to
8064 general assembler lossage, recognize PIC+GOTOFF and turn it back
8065 into a direct symbol reference.
8067 On Darwin, this is necessary to avoid a crash, because Darwin
8068 has a different PIC label for each routine but the DWARF debugging
8069 information is not associated with any particular routine, so it's
8070 necessary to remove references to the PIC label from RTL stored by
8071 the DWARF output code. */
8074 ix86_delegitimize_address (rtx orig_x)
8077 /* reg_addend is NULL or a multiple of some register. */
8078 rtx reg_addend = NULL_RTX;
8079 /* const_addend is NULL or a const_int. */
8080 rtx const_addend = NULL_RTX;
8081 /* This is the result, or NULL. */
8082 rtx result = NULL_RTX;
8089 if (GET_CODE (x) != CONST
8090 || GET_CODE (XEXP (x, 0)) != UNSPEC
8091 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8094 return XVECEXP (XEXP (x, 0), 0, 0);
8097 if (GET_CODE (x) != PLUS
8098 || GET_CODE (XEXP (x, 1)) != CONST)
8101 if (REG_P (XEXP (x, 0))
8102 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8103 /* %ebx + GOT/GOTOFF */
8105 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8107 /* %ebx + %reg * scale + GOT/GOTOFF */
8108 reg_addend = XEXP (x, 0);
8109 if (REG_P (XEXP (reg_addend, 0))
8110 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8111 reg_addend = XEXP (reg_addend, 1);
8112 else if (REG_P (XEXP (reg_addend, 1))
8113 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8114 reg_addend = XEXP (reg_addend, 0);
8117 if (!REG_P (reg_addend)
8118 && GET_CODE (reg_addend) != MULT
8119 && GET_CODE (reg_addend) != ASHIFT)
8125 x = XEXP (XEXP (x, 1), 0);
8126 if (GET_CODE (x) == PLUS
8127 && CONST_INT_P (XEXP (x, 1)))
8129 const_addend = XEXP (x, 1);
8133 if (GET_CODE (x) == UNSPEC
8134 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8135 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8136 result = XVECEXP (x, 0, 0);
8138 if (TARGET_MACHO && darwin_local_data_pic (x)
8140 result = XEXP (x, 0);
8146 result = gen_rtx_PLUS (Pmode, result, const_addend);
8148 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8152 /* If X is a machine specific address (i.e. a symbol or label being
8153 referenced as a displacement from the GOT implemented using an
8154 UNSPEC), then return the base term. Otherwise return X. */
8157 ix86_find_base_term (rtx x)
8163 if (GET_CODE (x) != CONST)
8166 if (GET_CODE (term) == PLUS
8167 && (CONST_INT_P (XEXP (term, 1))
8168 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8169 term = XEXP (term, 0);
8170 if (GET_CODE (term) != UNSPEC
8171 || XINT (term, 1) != UNSPEC_GOTPCREL)
8174 term = XVECEXP (term, 0, 0);
8176 if (GET_CODE (term) != SYMBOL_REF
8177 && GET_CODE (term) != LABEL_REF)
8183 term = ix86_delegitimize_address (x);
8185 if (GET_CODE (term) != SYMBOL_REF
8186 && GET_CODE (term) != LABEL_REF)
8193 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8198 if (mode == CCFPmode || mode == CCFPUmode)
8200 enum rtx_code second_code, bypass_code;
8201 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8202 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8203 code = ix86_fp_compare_code_to_integer (code);
8207 code = reverse_condition (code);
8258 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8262 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8263 Those same assemblers have the same but opposite lossage on cmov. */
8265 suffix = fp ? "nbe" : "a";
8266 else if (mode == CCCmode)
8289 gcc_assert (mode == CCmode || mode == CCCmode);
8311 gcc_assert (mode == CCmode || mode == CCCmode);
8312 suffix = fp ? "nb" : "ae";
8315 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8322 else if (mode == CCCmode)
8323 suffix = fp ? "nb" : "ae";
8328 suffix = fp ? "u" : "p";
8331 suffix = fp ? "nu" : "np";
8336 fputs (suffix, file);
8339 /* Print the name of register X to FILE based on its machine mode and number.
8340 If CODE is 'w', pretend the mode is HImode.
8341 If CODE is 'b', pretend the mode is QImode.
8342 If CODE is 'k', pretend the mode is SImode.
8343 If CODE is 'q', pretend the mode is DImode.
8344 If CODE is 'h', pretend the reg is the 'high' byte register.
8345 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8348 print_reg (rtx x, int code, FILE *file)
8350 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8351 && REGNO (x) != FRAME_POINTER_REGNUM
8352 && REGNO (x) != FLAGS_REG
8353 && REGNO (x) != FPSR_REG
8354 && REGNO (x) != FPCR_REG);
8356 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8359 if (code == 'w' || MMX_REG_P (x))
8361 else if (code == 'b')
8363 else if (code == 'k')
8365 else if (code == 'q')
8367 else if (code == 'y')
8369 else if (code == 'h')
8372 code = GET_MODE_SIZE (GET_MODE (x));
8374 /* Irritatingly, AMD extended registers use different naming convention
8375 from the normal registers. */
8376 if (REX_INT_REG_P (x))
8378 gcc_assert (TARGET_64BIT);
8382 error ("extended registers have no high halves");
8385 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8388 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8391 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8394 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8397 error ("unsupported operand size for extended register");
8405 if (STACK_TOP_P (x))
8407 fputs ("st(0)", file);
8414 if (! ANY_FP_REG_P (x))
8415 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8420 fputs (hi_reg_name[REGNO (x)], file);
8423 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8425 fputs (qi_reg_name[REGNO (x)], file);
8428 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8430 fputs (qi_high_reg_name[REGNO (x)], file);
8437 /* Locate some local-dynamic symbol still in use by this function
8438 so that we can print its name in some tls_local_dynamic_base
8442 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8446 if (GET_CODE (x) == SYMBOL_REF
8447 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8449 cfun->machine->some_ld_name = XSTR (x, 0);
8457 get_some_local_dynamic_name (void)
8461 if (cfun->machine->some_ld_name)
8462 return cfun->machine->some_ld_name;
8464 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8466 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8467 return cfun->machine->some_ld_name;
8473 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8474 C -- print opcode suffix for set/cmov insn.
8475 c -- like C, but print reversed condition
8476 F,f -- likewise, but for floating-point.
8477 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8479 R -- print the prefix for register names.
8480 z -- print the opcode suffix for the size of the current operand.
8481 * -- print a star (in certain assembler syntax)
8482 A -- print an absolute memory reference.
8483 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8484 s -- print a shift double count, followed by the assemblers argument
8486 b -- print the QImode name of the register for the indicated operand.
8487 %b0 would print %al if operands[0] is reg 0.
8488 w -- likewise, print the HImode name of the register.
8489 k -- likewise, print the SImode name of the register.
8490 q -- likewise, print the DImode name of the register.
8491 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8492 y -- print "st(0)" instead of "st" as a register.
8493 D -- print condition for SSE cmp instruction.
8494 P -- if PIC, print an @PLT suffix.
8495 X -- don't print any sort of PIC '@' suffix for a symbol.
8496 & -- print some in-use local-dynamic symbol name.
8497 H -- print a memory address offset by 8; used for sse high-parts
8501 print_operand (FILE *file, rtx x, int code)
8508 if (ASSEMBLER_DIALECT == ASM_ATT)
8513 assemble_name (file, get_some_local_dynamic_name ());
8517 switch (ASSEMBLER_DIALECT)
8524 /* Intel syntax. For absolute addresses, registers should not
8525 be surrounded by braces. */
8529 PRINT_OPERAND (file, x, 0);
8539 PRINT_OPERAND (file, x, 0);
8544 if (ASSEMBLER_DIALECT == ASM_ATT)
8549 if (ASSEMBLER_DIALECT == ASM_ATT)
8554 if (ASSEMBLER_DIALECT == ASM_ATT)
8559 if (ASSEMBLER_DIALECT == ASM_ATT)
8564 if (ASSEMBLER_DIALECT == ASM_ATT)
8569 if (ASSEMBLER_DIALECT == ASM_ATT)
8574 /* 387 opcodes don't get size suffixes if the operands are
8576 if (STACK_REG_P (x))
8579 /* Likewise if using Intel opcodes. */
8580 if (ASSEMBLER_DIALECT == ASM_INTEL)
8583 /* This is the size of op from size of operand. */
8584 switch (GET_MODE_SIZE (GET_MODE (x)))
8593 #ifdef HAVE_GAS_FILDS_FISTS
8603 if (GET_MODE (x) == SFmode)
8618 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8620 #ifdef GAS_MNEMONICS
8646 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8648 PRINT_OPERAND (file, x, 0);
8654 /* Little bit of braindamage here. The SSE compare instructions
8655 does use completely different names for the comparisons that the
8656 fp conditional moves. */
8657 switch (GET_CODE (x))
8672 fputs ("unord", file);
8676 fputs ("neq", file);
8680 fputs ("nlt", file);
8684 fputs ("nle", file);
8687 fputs ("ord", file);
8694 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8695 if (ASSEMBLER_DIALECT == ASM_ATT)
8697 switch (GET_MODE (x))
8699 case HImode: putc ('w', file); break;
8701 case SFmode: putc ('l', file); break;
8703 case DFmode: putc ('q', file); break;
8704 default: gcc_unreachable ();
8711 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8714 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8715 if (ASSEMBLER_DIALECT == ASM_ATT)
8718 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8721 /* Like above, but reverse condition */
8723 /* Check to see if argument to %c is really a constant
8724 and not a condition code which needs to be reversed. */
8725 if (!COMPARISON_P (x))
8727 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8730 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8733 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8734 if (ASSEMBLER_DIALECT == ASM_ATT)
8737 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8741 /* It doesn't actually matter what mode we use here, as we're
8742 only going to use this for printing. */
8743 x = adjust_address_nv (x, DImode, 8);
8750 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8753 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8756 int pred_val = INTVAL (XEXP (x, 0));
8758 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8759 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8761 int taken = pred_val > REG_BR_PROB_BASE / 2;
8762 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8764 /* Emit hints only in the case default branch prediction
8765 heuristics would fail. */
8766 if (taken != cputaken)
8768 /* We use 3e (DS) prefix for taken branches and
8769 2e (CS) prefix for not taken branches. */
8771 fputs ("ds ; ", file);
8773 fputs ("cs ; ", file);
8780 output_operand_lossage ("invalid operand code '%c'", code);
8785 print_reg (x, code, file);
8789 /* No `byte ptr' prefix for call instructions. */
8790 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8793 switch (GET_MODE_SIZE (GET_MODE (x)))
8795 case 1: size = "BYTE"; break;
8796 case 2: size = "WORD"; break;
8797 case 4: size = "DWORD"; break;
8798 case 8: size = "QWORD"; break;
8799 case 12: size = "XWORD"; break;
8800 case 16: size = "XMMWORD"; break;
8805 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8808 else if (code == 'w')
8810 else if (code == 'k')
8814 fputs (" PTR ", file);
8818 /* Avoid (%rip) for call operands. */
8819 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8820 && !CONST_INT_P (x))
8821 output_addr_const (file, x);
8822 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8823 output_operand_lossage ("invalid constraints for operand");
8828 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8833 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8834 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8836 if (ASSEMBLER_DIALECT == ASM_ATT)
8838 fprintf (file, "0x%08lx", l);
8841 /* These float cases don't actually occur as immediate operands. */
8842 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8846 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8847 fprintf (file, "%s", dstr);
8850 else if (GET_CODE (x) == CONST_DOUBLE
8851 && GET_MODE (x) == XFmode)
8855 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8856 fprintf (file, "%s", dstr);
8861 /* We have patterns that allow zero sets of memory, for instance.
8862 In 64-bit mode, we should probably support all 8-byte vectors,
8863 since we can in fact encode that into an immediate. */
8864 if (GET_CODE (x) == CONST_VECTOR)
8866 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8872 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8874 if (ASSEMBLER_DIALECT == ASM_ATT)
8877 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8878 || GET_CODE (x) == LABEL_REF)
8880 if (ASSEMBLER_DIALECT == ASM_ATT)
8883 fputs ("OFFSET FLAT:", file);
8886 if (CONST_INT_P (x))
8887 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8889 output_pic_addr_const (file, x, code);
8891 output_addr_const (file, x);
8895 /* Print a memory operand whose address is ADDR. */
8898 print_operand_address (FILE *file, rtx addr)
8900 struct ix86_address parts;
8901 rtx base, index, disp;
8903 int ok = ix86_decompose_address (addr, &parts);
8908 index = parts.index;
8910 scale = parts.scale;
8918 if (USER_LABEL_PREFIX[0] == 0)
8920 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8926 if (!base && !index)
8928 /* Displacement only requires special attention. */
8930 if (CONST_INT_P (disp))
8932 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8934 if (USER_LABEL_PREFIX[0] == 0)
8936 fputs ("ds:", file);
8938 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8941 output_pic_addr_const (file, disp, 0);
8943 output_addr_const (file, disp);
8945 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8948 if (GET_CODE (disp) == CONST
8949 && GET_CODE (XEXP (disp, 0)) == PLUS
8950 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8951 disp = XEXP (XEXP (disp, 0), 0);
8952 if (GET_CODE (disp) == LABEL_REF
8953 || (GET_CODE (disp) == SYMBOL_REF
8954 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8955 fputs ("(%rip)", file);
8960 if (ASSEMBLER_DIALECT == ASM_ATT)
8965 output_pic_addr_const (file, disp, 0);
8966 else if (GET_CODE (disp) == LABEL_REF)
8967 output_asm_label (disp);
8969 output_addr_const (file, disp);
8974 print_reg (base, 0, file);
8978 print_reg (index, 0, file);
8980 fprintf (file, ",%d", scale);
8986 rtx offset = NULL_RTX;
8990 /* Pull out the offset of a symbol; print any symbol itself. */
8991 if (GET_CODE (disp) == CONST
8992 && GET_CODE (XEXP (disp, 0)) == PLUS
8993 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8995 offset = XEXP (XEXP (disp, 0), 1);
8996 disp = gen_rtx_CONST (VOIDmode,
8997 XEXP (XEXP (disp, 0), 0));
9001 output_pic_addr_const (file, disp, 0);
9002 else if (GET_CODE (disp) == LABEL_REF)
9003 output_asm_label (disp);
9004 else if (CONST_INT_P (disp))
9007 output_addr_const (file, disp);
9013 print_reg (base, 0, file);
9016 if (INTVAL (offset) >= 0)
9018 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9022 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9029 print_reg (index, 0, file);
9031 fprintf (file, "*%d", scale);
9039 output_addr_const_extra (FILE *file, rtx x)
9043 if (GET_CODE (x) != UNSPEC)
9046 op = XVECEXP (x, 0, 0);
9047 switch (XINT (x, 1))
9049 case UNSPEC_GOTTPOFF:
9050 output_addr_const (file, op);
9051 /* FIXME: This might be @TPOFF in Sun ld. */
9052 fputs ("@GOTTPOFF", file);
9055 output_addr_const (file, op);
9056 fputs ("@TPOFF", file);
9059 output_addr_const (file, op);
9061 fputs ("@TPOFF", file);
9063 fputs ("@NTPOFF", file);
9066 output_addr_const (file, op);
9067 fputs ("@DTPOFF", file);
9069 case UNSPEC_GOTNTPOFF:
9070 output_addr_const (file, op);
9072 fputs ("@GOTTPOFF(%rip)", file);
9074 fputs ("@GOTNTPOFF", file);
9076 case UNSPEC_INDNTPOFF:
9077 output_addr_const (file, op);
9078 fputs ("@INDNTPOFF", file);
9088 /* Split one or more DImode RTL references into pairs of SImode
9089 references. The RTL can be REG, offsettable MEM, integer constant, or
9090 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9091 split and "num" is its length. lo_half and hi_half are output arrays
9092 that parallel "operands". */
9095 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9099 rtx op = operands[num];
9101 /* simplify_subreg refuse to split volatile memory addresses,
9102 but we still have to handle it. */
9105 lo_half[num] = adjust_address (op, SImode, 0);
9106 hi_half[num] = adjust_address (op, SImode, 4);
9110 lo_half[num] = simplify_gen_subreg (SImode, op,
9111 GET_MODE (op) == VOIDmode
9112 ? DImode : GET_MODE (op), 0);
9113 hi_half[num] = simplify_gen_subreg (SImode, op,
9114 GET_MODE (op) == VOIDmode
9115 ? DImode : GET_MODE (op), 4);
9119 /* Split one or more TImode RTL references into pairs of DImode
9120 references. The RTL can be REG, offsettable MEM, integer constant, or
9121 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9122 split and "num" is its length. lo_half and hi_half are output arrays
9123 that parallel "operands". */
9126 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9130 rtx op = operands[num];
9132 /* simplify_subreg refuse to split volatile memory addresses, but we
9133 still have to handle it. */
9136 lo_half[num] = adjust_address (op, DImode, 0);
9137 hi_half[num] = adjust_address (op, DImode, 8);
9141 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9142 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9147 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9148 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9149 is the expression of the binary operation. The output may either be
9150 emitted here, or returned to the caller, like all output_* functions.
9152 There is no guarantee that the operands are the same mode, as they
9153 might be within FLOAT or FLOAT_EXTEND expressions. */
9155 #ifndef SYSV386_COMPAT
9156 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9157 wants to fix the assemblers because that causes incompatibility
9158 with gcc. No-one wants to fix gcc because that causes
9159 incompatibility with assemblers... You can use the option of
9160 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9161 #define SYSV386_COMPAT 1
9165 output_387_binary_op (rtx insn, rtx *operands)
9167 static char buf[30];
9170 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9172 #ifdef ENABLE_CHECKING
9173 /* Even if we do not want to check the inputs, this documents input
9174 constraints. Which helps in understanding the following code. */
9175 if (STACK_REG_P (operands[0])
9176 && ((REG_P (operands[1])
9177 && REGNO (operands[0]) == REGNO (operands[1])
9178 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9179 || (REG_P (operands[2])
9180 && REGNO (operands[0]) == REGNO (operands[2])
9181 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9182 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9185 gcc_assert (is_sse);
9188 switch (GET_CODE (operands[3]))
9191 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9192 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9200 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9201 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9209 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9210 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9218 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9219 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9233 if (GET_MODE (operands[0]) == SFmode)
9234 strcat (buf, "ss\t{%2, %0|%0, %2}");
9236 strcat (buf, "sd\t{%2, %0|%0, %2}");
9241 switch (GET_CODE (operands[3]))
9245 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9247 rtx temp = operands[2];
9248 operands[2] = operands[1];
9252 /* know operands[0] == operands[1]. */
9254 if (MEM_P (operands[2]))
9260 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9262 if (STACK_TOP_P (operands[0]))
9263 /* How is it that we are storing to a dead operand[2]?
9264 Well, presumably operands[1] is dead too. We can't
9265 store the result to st(0) as st(0) gets popped on this
9266 instruction. Instead store to operands[2] (which I
9267 think has to be st(1)). st(1) will be popped later.
9268 gcc <= 2.8.1 didn't have this check and generated
9269 assembly code that the Unixware assembler rejected. */
9270 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9272 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9276 if (STACK_TOP_P (operands[0]))
9277 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9279 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9284 if (MEM_P (operands[1]))
9290 if (MEM_P (operands[2]))
9296 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9299 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9300 derived assemblers, confusingly reverse the direction of
9301 the operation for fsub{r} and fdiv{r} when the
9302 destination register is not st(0). The Intel assembler
9303 doesn't have this brain damage. Read !SYSV386_COMPAT to
9304 figure out what the hardware really does. */
9305 if (STACK_TOP_P (operands[0]))
9306 p = "{p\t%0, %2|rp\t%2, %0}";
9308 p = "{rp\t%2, %0|p\t%0, %2}";
9310 if (STACK_TOP_P (operands[0]))
9311 /* As above for fmul/fadd, we can't store to st(0). */
9312 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9314 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9319 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9322 if (STACK_TOP_P (operands[0]))
9323 p = "{rp\t%0, %1|p\t%1, %0}";
9325 p = "{p\t%1, %0|rp\t%0, %1}";
9327 if (STACK_TOP_P (operands[0]))
9328 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9330 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9335 if (STACK_TOP_P (operands[0]))
9337 if (STACK_TOP_P (operands[1]))
9338 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9340 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9343 else if (STACK_TOP_P (operands[1]))
9346 p = "{\t%1, %0|r\t%0, %1}";
9348 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9354 p = "{r\t%2, %0|\t%0, %2}";
9356 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9369 /* Return needed mode for entity in optimize_mode_switching pass. */
9372 ix86_mode_needed (int entity, rtx insn)
9374 enum attr_i387_cw mode;
9376 /* The mode UNINITIALIZED is used to store control word after a
9377 function call or ASM pattern. The mode ANY specify that function
9378 has no requirements on the control word and make no changes in the
9379 bits we are interested in. */
9382 || (NONJUMP_INSN_P (insn)
9383 && (asm_noperands (PATTERN (insn)) >= 0
9384 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9385 return I387_CW_UNINITIALIZED;
9387 if (recog_memoized (insn) < 0)
9390 mode = get_attr_i387_cw (insn);
9395 if (mode == I387_CW_TRUNC)
9400 if (mode == I387_CW_FLOOR)
9405 if (mode == I387_CW_CEIL)
9410 if (mode == I387_CW_MASK_PM)
9421 /* Output code to initialize control word copies used by trunc?f?i and
9422 rounding patterns. CURRENT_MODE is set to current control word,
9423 while NEW_MODE is set to new control word. */
9426 emit_i387_cw_initialization (int mode)
9428 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9431 enum ix86_stack_slot slot;
9433 rtx reg = gen_reg_rtx (HImode);
9435 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9436 emit_move_insn (reg, copy_rtx (stored_mode));
9438 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9443 /* round toward zero (truncate) */
9444 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9445 slot = SLOT_CW_TRUNC;
9449 /* round down toward -oo */
9450 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9451 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9452 slot = SLOT_CW_FLOOR;
9456 /* round up toward +oo */
9457 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9458 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9459 slot = SLOT_CW_CEIL;
9462 case I387_CW_MASK_PM:
9463 /* mask precision exception for nearbyint() */
9464 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9465 slot = SLOT_CW_MASK_PM;
9477 /* round toward zero (truncate) */
9478 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9479 slot = SLOT_CW_TRUNC;
9483 /* round down toward -oo */
9484 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9485 slot = SLOT_CW_FLOOR;
9489 /* round up toward +oo */
9490 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9491 slot = SLOT_CW_CEIL;
9494 case I387_CW_MASK_PM:
9495 /* mask precision exception for nearbyint() */
9496 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9497 slot = SLOT_CW_MASK_PM;
9505 gcc_assert (slot < MAX_386_STACK_LOCALS);
9507 new_mode = assign_386_stack_local (HImode, slot);
9508 emit_move_insn (new_mode, reg);
9511 /* Output code for INSN to convert a float to a signed int. OPERANDS
9512 are the insn operands. The output may be [HSD]Imode and the input
9513 operand may be [SDX]Fmode. */
9516 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9518 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9519 int dimode_p = GET_MODE (operands[0]) == DImode;
9520 int round_mode = get_attr_i387_cw (insn);
9522 /* Jump through a hoop or two for DImode, since the hardware has no
9523 non-popping instruction. We used to do this a different way, but
9524 that was somewhat fragile and broke with post-reload splitters. */
9525 if ((dimode_p || fisttp) && !stack_top_dies)
9526 output_asm_insn ("fld\t%y1", operands);
9528 gcc_assert (STACK_TOP_P (operands[1]));
9529 gcc_assert (MEM_P (operands[0]));
9530 gcc_assert (GET_MODE (operands[1]) != TFmode);
9533 output_asm_insn ("fisttp%z0\t%0", operands);
9536 if (round_mode != I387_CW_ANY)
9537 output_asm_insn ("fldcw\t%3", operands);
9538 if (stack_top_dies || dimode_p)
9539 output_asm_insn ("fistp%z0\t%0", operands);
9541 output_asm_insn ("fist%z0\t%0", operands);
9542 if (round_mode != I387_CW_ANY)
9543 output_asm_insn ("fldcw\t%2", operands);
9549 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9550 have the values zero or one, indicates the ffreep insn's operand
9551 from the OPERANDS array. */
9554 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9556 if (TARGET_USE_FFREEP)
9557 #if HAVE_AS_IX86_FFREEP
9558 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9561 static char retval[] = ".word\t0xc_df";
9562 int regno = REGNO (operands[opno]);
9564 gcc_assert (FP_REGNO_P (regno));
9566 retval[9] = '0' + (regno - FIRST_STACK_REG);
9571 return opno ? "fstp\t%y1" : "fstp\t%y0";
9575 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9576 should be used. UNORDERED_P is true when fucom should be used. */
9579 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9582 rtx cmp_op0, cmp_op1;
9583 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9587 cmp_op0 = operands[0];
9588 cmp_op1 = operands[1];
9592 cmp_op0 = operands[1];
9593 cmp_op1 = operands[2];
9598 if (GET_MODE (operands[0]) == SFmode)
9600 return "ucomiss\t{%1, %0|%0, %1}";
9602 return "comiss\t{%1, %0|%0, %1}";
9605 return "ucomisd\t{%1, %0|%0, %1}";
9607 return "comisd\t{%1, %0|%0, %1}";
9610 gcc_assert (STACK_TOP_P (cmp_op0));
9612 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9614 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9618 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9619 return output_387_ffreep (operands, 1);
9622 return "ftst\n\tfnstsw\t%0";
9625 if (STACK_REG_P (cmp_op1)
9627 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9628 && REGNO (cmp_op1) != FIRST_STACK_REG)
9630 /* If both the top of the 387 stack dies, and the other operand
9631 is also a stack register that dies, then this must be a
9632 `fcompp' float compare */
9636 /* There is no double popping fcomi variant. Fortunately,
9637 eflags is immune from the fstp's cc clobbering. */
9639 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9641 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9642 return output_387_ffreep (operands, 0);
9647 return "fucompp\n\tfnstsw\t%0";
9649 return "fcompp\n\tfnstsw\t%0";
9654 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9656 static const char * const alt[16] =
9658 "fcom%z2\t%y2\n\tfnstsw\t%0",
9659 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9660 "fucom%z2\t%y2\n\tfnstsw\t%0",
9661 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9663 "ficom%z2\t%y2\n\tfnstsw\t%0",
9664 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9668 "fcomi\t{%y1, %0|%0, %y1}",
9669 "fcomip\t{%y1, %0|%0, %y1}",
9670 "fucomi\t{%y1, %0|%0, %y1}",
9671 "fucomip\t{%y1, %0|%0, %y1}",
9682 mask = eflags_p << 3;
9683 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9684 mask |= unordered_p << 1;
9685 mask |= stack_top_dies;
9687 gcc_assert (mask < 16);
9696 ix86_output_addr_vec_elt (FILE *file, int value)
9698 const char *directive = ASM_LONG;
9702 directive = ASM_QUAD;
9704 gcc_assert (!TARGET_64BIT);
9707 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9711 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9713 const char *directive = ASM_LONG;
9716 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9717 directive = ASM_QUAD;
9719 gcc_assert (!TARGET_64BIT);
9721 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9722 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9723 fprintf (file, "%s%s%d-%s%d\n",
9724 directive, LPREFIX, value, LPREFIX, rel);
9725 else if (HAVE_AS_GOTOFF_IN_DATA)
9726 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9728 else if (TARGET_MACHO)
9730 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9731 machopic_output_function_base_name (file);
9732 fprintf(file, "\n");
9736 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9737 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9740 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9744 ix86_expand_clear (rtx dest)
9748 /* We play register width games, which are only valid after reload. */
9749 gcc_assert (reload_completed);
9751 /* Avoid HImode and its attendant prefix byte. */
9752 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9753 dest = gen_rtx_REG (SImode, REGNO (dest));
9754 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9756 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9757 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9759 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9760 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9766 /* X is an unchanging MEM. If it is a constant pool reference, return
9767 the constant pool rtx, else NULL. */
9770 maybe_get_pool_constant (rtx x)
9772 x = ix86_delegitimize_address (XEXP (x, 0));
9774 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9775 return get_pool_constant (x);
9781 ix86_expand_move (enum machine_mode mode, rtx operands[])
9783 int strict = (reload_in_progress || reload_completed);
9785 enum tls_model model;
9790 if (GET_CODE (op1) == SYMBOL_REF)
9792 model = SYMBOL_REF_TLS_MODEL (op1);
9795 op1 = legitimize_tls_address (op1, model, true);
9796 op1 = force_operand (op1, op0);
9800 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9801 && SYMBOL_REF_DLLIMPORT_P (op1))
9802 op1 = legitimize_dllimport_symbol (op1, false);
9804 else if (GET_CODE (op1) == CONST
9805 && GET_CODE (XEXP (op1, 0)) == PLUS
9806 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9808 rtx addend = XEXP (XEXP (op1, 0), 1);
9809 rtx symbol = XEXP (XEXP (op1, 0), 0);
9812 model = SYMBOL_REF_TLS_MODEL (symbol);
9814 tmp = legitimize_tls_address (symbol, model, true);
9815 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9816 && SYMBOL_REF_DLLIMPORT_P (symbol))
9817 tmp = legitimize_dllimport_symbol (symbol, true);
9821 tmp = force_operand (tmp, NULL);
9822 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9823 op0, 1, OPTAB_DIRECT);
9829 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9831 if (TARGET_MACHO && !TARGET_64BIT)
9836 rtx temp = ((reload_in_progress
9837 || ((op0 && REG_P (op0))
9839 ? op0 : gen_reg_rtx (Pmode));
9840 op1 = machopic_indirect_data_reference (op1, temp);
9841 op1 = machopic_legitimize_pic_address (op1, mode,
9842 temp == op1 ? 0 : temp);
9844 else if (MACHOPIC_INDIRECT)
9845 op1 = machopic_indirect_data_reference (op1, 0);
9853 op1 = force_reg (Pmode, op1);
9854 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9856 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
9857 op1 = legitimize_pic_address (op1, reg);
9866 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9867 || !push_operand (op0, mode))
9869 op1 = force_reg (mode, op1);
9871 if (push_operand (op0, mode)
9872 && ! general_no_elim_operand (op1, mode))
9873 op1 = copy_to_mode_reg (mode, op1);
9875 /* Force large constants in 64bit compilation into register
9876 to get them CSEed. */
9877 if (TARGET_64BIT && mode == DImode
9878 && immediate_operand (op1, mode)
9879 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9880 && !register_operand (op0, mode)
9881 && optimize && !reload_completed && !reload_in_progress)
9882 op1 = copy_to_mode_reg (mode, op1);
9884 if (FLOAT_MODE_P (mode))
9886 /* If we are loading a floating point constant to a register,
9887 force the value to memory now, since we'll get better code
9888 out the back end. */
9892 else if (GET_CODE (op1) == CONST_DOUBLE)
9894 op1 = validize_mem (force_const_mem (mode, op1));
9895 if (!register_operand (op0, mode))
9897 rtx temp = gen_reg_rtx (mode);
9898 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9899 emit_move_insn (op0, temp);
9906 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9910 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9912 rtx op0 = operands[0], op1 = operands[1];
9913 unsigned int align = GET_MODE_ALIGNMENT (mode);
9915 /* Force constants other than zero into memory. We do not know how
9916 the instructions used to build constants modify the upper 64 bits
9917 of the register, once we have that information we may be able
9918 to handle some of them more efficiently. */
9919 if ((reload_in_progress | reload_completed) == 0
9920 && register_operand (op0, mode)
9921 && (CONSTANT_P (op1)
9922 || (GET_CODE (op1) == SUBREG
9923 && CONSTANT_P (SUBREG_REG (op1))))
9924 && standard_sse_constant_p (op1) <= 0)
9925 op1 = validize_mem (force_const_mem (mode, op1));
9927 /* TDmode values are passed as TImode on the stack. Timode values
9928 are moved via xmm registers, and moving them to stack can result in
9929 unaligned memory access. Use ix86_expand_vector_move_misalign()
9930 if memory operand is not aligned correctly. */
9931 if (can_create_pseudo_p ()
9932 && (mode == TImode) && !TARGET_64BIT
9933 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
9934 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
9938 /* ix86_expand_vector_move_misalign() does not like constants ... */
9939 if (CONSTANT_P (op1)
9940 || (GET_CODE (op1) == SUBREG
9941 && CONSTANT_P (SUBREG_REG (op1))))
9942 op1 = validize_mem (force_const_mem (mode, op1));
9944 /* ... nor both arguments in memory. */
9945 if (!register_operand (op0, mode)
9946 && !register_operand (op1, mode))
9947 op1 = force_reg (mode, op1);
9949 tmp[0] = op0; tmp[1] = op1;
9950 ix86_expand_vector_move_misalign (mode, tmp);
9954 /* Make operand1 a register if it isn't already. */
9955 if (can_create_pseudo_p ()
9956 && !register_operand (op0, mode)
9957 && !register_operand (op1, mode))
9959 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9963 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9966 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9967 straight to ix86_expand_vector_move. */
9968 /* Code generation for scalar reg-reg moves of single and double precision data:
9969 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9973 if (x86_sse_partial_reg_dependency == true)
9978 Code generation for scalar loads of double precision data:
9979 if (x86_sse_split_regs == true)
9980 movlpd mem, reg (gas syntax)
9984 Code generation for unaligned packed loads of single precision data
9985 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9986 if (x86_sse_unaligned_move_optimal)
9989 if (x86_sse_partial_reg_dependency == true)
10001 Code generation for unaligned packed loads of double precision data
10002 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10003 if (x86_sse_unaligned_move_optimal)
10006 if (x86_sse_split_regs == true)
10019 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10028 /* If we're optimizing for size, movups is the smallest. */
10031 op0 = gen_lowpart (V4SFmode, op0);
10032 op1 = gen_lowpart (V4SFmode, op1);
10033 emit_insn (gen_sse_movups (op0, op1));
10037 /* ??? If we have typed data, then it would appear that using
10038 movdqu is the only way to get unaligned data loaded with
10040 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10042 op0 = gen_lowpart (V16QImode, op0);
10043 op1 = gen_lowpart (V16QImode, op1);
10044 emit_insn (gen_sse2_movdqu (op0, op1));
10048 if (TARGET_SSE2 && mode == V2DFmode)
10052 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10054 op0 = gen_lowpart (V2DFmode, op0);
10055 op1 = gen_lowpart (V2DFmode, op1);
10056 emit_insn (gen_sse2_movupd (op0, op1));
10060 /* When SSE registers are split into halves, we can avoid
10061 writing to the top half twice. */
10062 if (TARGET_SSE_SPLIT_REGS)
10064 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10069 /* ??? Not sure about the best option for the Intel chips.
10070 The following would seem to satisfy; the register is
10071 entirely cleared, breaking the dependency chain. We
10072 then store to the upper half, with a dependency depth
10073 of one. A rumor has it that Intel recommends two movsd
10074 followed by an unpacklpd, but this is unconfirmed. And
10075 given that the dependency depth of the unpacklpd would
10076 still be one, I'm not sure why this would be better. */
10077 zero = CONST0_RTX (V2DFmode);
10080 m = adjust_address (op1, DFmode, 0);
10081 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10082 m = adjust_address (op1, DFmode, 8);
10083 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10087 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10089 op0 = gen_lowpart (V4SFmode, op0);
10090 op1 = gen_lowpart (V4SFmode, op1);
10091 emit_insn (gen_sse_movups (op0, op1));
10095 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10096 emit_move_insn (op0, CONST0_RTX (mode));
10098 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10100 if (mode != V4SFmode)
10101 op0 = gen_lowpart (V4SFmode, op0);
10102 m = adjust_address (op1, V2SFmode, 0);
10103 emit_insn (gen_sse_loadlps (op0, op0, m));
10104 m = adjust_address (op1, V2SFmode, 8);
10105 emit_insn (gen_sse_loadhps (op0, op0, m));
10108 else if (MEM_P (op0))
10110 /* If we're optimizing for size, movups is the smallest. */
10113 op0 = gen_lowpart (V4SFmode, op0);
10114 op1 = gen_lowpart (V4SFmode, op1);
10115 emit_insn (gen_sse_movups (op0, op1));
10119 /* ??? Similar to above, only less clear because of quote
10120 typeless stores unquote. */
10121 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10122 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10124 op0 = gen_lowpart (V16QImode, op0);
10125 op1 = gen_lowpart (V16QImode, op1);
10126 emit_insn (gen_sse2_movdqu (op0, op1));
10130 if (TARGET_SSE2 && mode == V2DFmode)
10132 m = adjust_address (op0, DFmode, 0);
10133 emit_insn (gen_sse2_storelpd (m, op1));
10134 m = adjust_address (op0, DFmode, 8);
10135 emit_insn (gen_sse2_storehpd (m, op1));
10139 if (mode != V4SFmode)
10140 op1 = gen_lowpart (V4SFmode, op1);
10141 m = adjust_address (op0, V2SFmode, 0);
10142 emit_insn (gen_sse_storelps (m, op1));
10143 m = adjust_address (op0, V2SFmode, 8);
10144 emit_insn (gen_sse_storehps (m, op1));
10148 gcc_unreachable ();
10151 /* Expand a push in MODE. This is some mode for which we do not support
10152 proper push instructions, at least from the registers that we expect
10153 the value to live in. */
10156 ix86_expand_push (enum machine_mode mode, rtx x)
10160 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10161 GEN_INT (-GET_MODE_SIZE (mode)),
10162 stack_pointer_rtx, 1, OPTAB_DIRECT);
10163 if (tmp != stack_pointer_rtx)
10164 emit_move_insn (stack_pointer_rtx, tmp);
10166 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10167 emit_move_insn (tmp, x);
10170 /* Helper function of ix86_fixup_binary_operands to canonicalize
10171 operand order. Returns true if the operands should be swapped. */
10174 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10177 rtx dst = operands[0];
10178 rtx src1 = operands[1];
10179 rtx src2 = operands[2];
10181 /* If the operation is not commutative, we can't do anything. */
10182 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10185 /* Highest priority is that src1 should match dst. */
10186 if (rtx_equal_p (dst, src1))
10188 if (rtx_equal_p (dst, src2))
10191 /* Next highest priority is that immediate constants come second. */
10192 if (immediate_operand (src2, mode))
10194 if (immediate_operand (src1, mode))
10197 /* Lowest priority is that memory references should come second. */
10207 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10208 destination to use for the operation. If different from the true
10209 destination in operands[0], a copy operation will be required. */
10212 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10215 rtx dst = operands[0];
10216 rtx src1 = operands[1];
10217 rtx src2 = operands[2];
10219 /* Canonicalize operand order. */
10220 if (ix86_swap_binary_operands_p (code, mode, operands))
10227 /* Both source operands cannot be in memory. */
10228 if (MEM_P (src1) && MEM_P (src2))
10230 /* Optimization: Only read from memory once. */
10231 if (rtx_equal_p (src1, src2))
10233 src2 = force_reg (mode, src2);
10237 src2 = force_reg (mode, src2);
10240 /* If the destination is memory, and we do not have matching source
10241 operands, do things in registers. */
10242 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10243 dst = gen_reg_rtx (mode);
10245 /* Source 1 cannot be a constant. */
10246 if (CONSTANT_P (src1))
10247 src1 = force_reg (mode, src1);
10249 /* Source 1 cannot be a non-matching memory. */
10250 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10251 src1 = force_reg (mode, src1);
10253 operands[1] = src1;
10254 operands[2] = src2;
10258 /* Similarly, but assume that the destination has already been
10259 set up properly. */
10262 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10263 enum machine_mode mode, rtx operands[])
10265 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10266 gcc_assert (dst == operands[0]);
10269 /* Attempt to expand a binary operator. Make the expansion closer to the
10270 actual machine, then just general_operand, which will allow 3 separate
10271 memory references (one output, two input) in a single insn. */
10274 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10277 rtx src1, src2, dst, op, clob;
10279 dst = ix86_fixup_binary_operands (code, mode, operands);
10280 src1 = operands[1];
10281 src2 = operands[2];
10283 /* Emit the instruction. */
10285 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10286 if (reload_in_progress)
10288 /* Reload doesn't know about the flags register, and doesn't know that
10289 it doesn't want to clobber it. We can only do this with PLUS. */
10290 gcc_assert (code == PLUS);
10295 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10296 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10299 /* Fix up the destination if needed. */
10300 if (dst != operands[0])
10301 emit_move_insn (operands[0], dst);
10304 /* Return TRUE or FALSE depending on whether the binary operator meets the
10305 appropriate constraints. */
10308 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10311 rtx dst = operands[0];
10312 rtx src1 = operands[1];
10313 rtx src2 = operands[2];
10315 /* Both source operands cannot be in memory. */
10316 if (MEM_P (src1) && MEM_P (src2))
10319 /* Canonicalize operand order for commutative operators. */
10320 if (ix86_swap_binary_operands_p (code, mode, operands))
10327 /* If the destination is memory, we must have a matching source operand. */
10328 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10331 /* Source 1 cannot be a constant. */
10332 if (CONSTANT_P (src1))
10335 /* Source 1 cannot be a non-matching memory. */
10336 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10342 /* Attempt to expand a unary operator. Make the expansion closer to the
10343 actual machine, then just general_operand, which will allow 2 separate
10344 memory references (one output, one input) in a single insn. */
10347 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10350 int matching_memory;
10351 rtx src, dst, op, clob;
10356 /* If the destination is memory, and we do not have matching source
10357 operands, do things in registers. */
10358 matching_memory = 0;
10361 if (rtx_equal_p (dst, src))
10362 matching_memory = 1;
10364 dst = gen_reg_rtx (mode);
10367 /* When source operand is memory, destination must match. */
10368 if (MEM_P (src) && !matching_memory)
10369 src = force_reg (mode, src);
10371 /* Emit the instruction. */
10373 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10374 if (reload_in_progress || code == NOT)
10376 /* Reload doesn't know about the flags register, and doesn't know that
10377 it doesn't want to clobber it. */
10378 gcc_assert (code == NOT);
10383 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10384 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10387 /* Fix up the destination if needed. */
10388 if (dst != operands[0])
10389 emit_move_insn (operands[0], dst);
10392 /* Return TRUE or FALSE depending on whether the unary operator meets the
10393 appropriate constraints. */
10396 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10397 enum machine_mode mode ATTRIBUTE_UNUSED,
10398 rtx operands[2] ATTRIBUTE_UNUSED)
10400 /* If one of operands is memory, source and destination must match. */
10401 if ((MEM_P (operands[0])
10402 || MEM_P (operands[1]))
10403 && ! rtx_equal_p (operands[0], operands[1]))
10408 /* Post-reload splitter for converting an SF or DFmode value in an
10409 SSE register into an unsigned SImode. */
10412 ix86_split_convert_uns_si_sse (rtx operands[])
10414 enum machine_mode vecmode;
10415 rtx value, large, zero_or_two31, input, two31, x;
10417 large = operands[1];
10418 zero_or_two31 = operands[2];
10419 input = operands[3];
10420 two31 = operands[4];
10421 vecmode = GET_MODE (large);
10422 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10424 /* Load up the value into the low element. We must ensure that the other
10425 elements are valid floats -- zero is the easiest such value. */
10428 if (vecmode == V4SFmode)
10429 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10431 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10435 input = gen_rtx_REG (vecmode, REGNO (input));
10436 emit_move_insn (value, CONST0_RTX (vecmode));
10437 if (vecmode == V4SFmode)
10438 emit_insn (gen_sse_movss (value, value, input));
10440 emit_insn (gen_sse2_movsd (value, value, input));
10443 emit_move_insn (large, two31);
10444 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10446 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10447 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10449 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10450 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10452 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10453 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10455 large = gen_rtx_REG (V4SImode, REGNO (large));
10456 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10458 x = gen_rtx_REG (V4SImode, REGNO (value));
10459 if (vecmode == V4SFmode)
10460 emit_insn (gen_sse2_cvttps2dq (x, value));
10462 emit_insn (gen_sse2_cvttpd2dq (x, value));
10465 emit_insn (gen_xorv4si3 (value, value, large));
10468 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10469 Expects the 64-bit DImode to be supplied in a pair of integral
10470 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10471 -mfpmath=sse, !optimize_size only. */
10474 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10476 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10477 rtx int_xmm, fp_xmm;
10478 rtx biases, exponents;
10481 int_xmm = gen_reg_rtx (V4SImode);
10482 if (TARGET_INTER_UNIT_MOVES)
10483 emit_insn (gen_movdi_to_sse (int_xmm, input));
10484 else if (TARGET_SSE_SPLIT_REGS)
10486 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10487 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10491 x = gen_reg_rtx (V2DImode);
10492 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10493 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10496 x = gen_rtx_CONST_VECTOR (V4SImode,
10497 gen_rtvec (4, GEN_INT (0x43300000UL),
10498 GEN_INT (0x45300000UL),
10499 const0_rtx, const0_rtx));
10500 exponents = validize_mem (force_const_mem (V4SImode, x));
10502 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10503 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10505 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10506 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10507 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10508 (0x1.0p84 + double(fp_value_hi_xmm)).
10509 Note these exponents differ by 32. */
10511 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10513 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10514 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10515 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10516 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10517 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10518 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10519 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10520 biases = validize_mem (force_const_mem (V2DFmode, biases));
10521 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10523 /* Add the upper and lower DFmode values together. */
10525 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10528 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10529 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10530 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10533 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10536 /* Convert an unsigned SImode value into a DFmode. Only currently used
10537 for SSE, but applicable anywhere. */
10540 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10542 REAL_VALUE_TYPE TWO31r;
10545 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10546 NULL, 1, OPTAB_DIRECT);
10548 fp = gen_reg_rtx (DFmode);
10549 emit_insn (gen_floatsidf2 (fp, x));
10551 real_ldexp (&TWO31r, &dconst1, 31);
10552 x = const_double_from_real_value (TWO31r, DFmode);
10554 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10556 emit_move_insn (target, x);
10559 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10560 32-bit mode; otherwise we have a direct convert instruction. */
10563 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10565 REAL_VALUE_TYPE TWO32r;
10566 rtx fp_lo, fp_hi, x;
10568 fp_lo = gen_reg_rtx (DFmode);
10569 fp_hi = gen_reg_rtx (DFmode);
10571 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10573 real_ldexp (&TWO32r, &dconst1, 32);
10574 x = const_double_from_real_value (TWO32r, DFmode);
10575 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10577 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10579 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10582 emit_move_insn (target, x);
10585 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10586 For x86_32, -mfpmath=sse, !optimize_size only. */
10588 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10590 REAL_VALUE_TYPE ONE16r;
10591 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10593 real_ldexp (&ONE16r, &dconst1, 16);
10594 x = const_double_from_real_value (ONE16r, SFmode);
10595 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10596 NULL, 0, OPTAB_DIRECT);
10597 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10598 NULL, 0, OPTAB_DIRECT);
10599 fp_hi = gen_reg_rtx (SFmode);
10600 fp_lo = gen_reg_rtx (SFmode);
10601 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10602 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10603 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10605 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10607 if (!rtx_equal_p (target, fp_hi))
10608 emit_move_insn (target, fp_hi);
10611 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10612 then replicate the value for all elements of the vector
10616 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10623 v = gen_rtvec (4, value, value, value, value);
10624 return gen_rtx_CONST_VECTOR (V4SImode, v);
10628 v = gen_rtvec (2, value, value);
10629 return gen_rtx_CONST_VECTOR (V2DImode, v);
10633 v = gen_rtvec (4, value, value, value, value);
10635 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10636 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10637 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10641 v = gen_rtvec (2, value, value);
10643 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10644 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10647 gcc_unreachable ();
10651 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10652 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10653 for an SSE register. If VECT is true, then replicate the mask for
10654 all elements of the vector register. If INVERT is true, then create
10655 a mask excluding the sign bit. */
10658 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10660 enum machine_mode vec_mode, imode;
10661 HOST_WIDE_INT hi, lo;
10666 /* Find the sign bit, sign extended to 2*HWI. */
10672 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10673 lo = 0x80000000, hi = lo < 0;
10679 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10680 if (HOST_BITS_PER_WIDE_INT >= 64)
10681 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10683 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10689 vec_mode = VOIDmode;
10690 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
10691 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
10695 gcc_unreachable ();
10699 lo = ~lo, hi = ~hi;
10701 /* Force this value into the low part of a fp vector constant. */
10702 mask = immed_double_const (lo, hi, imode);
10703 mask = gen_lowpart (mode, mask);
10705 if (vec_mode == VOIDmode)
10706 return force_reg (mode, mask);
10708 v = ix86_build_const_vector (mode, vect, mask);
10709 return force_reg (vec_mode, v);
10712 /* Generate code for floating point ABS or NEG. */
10715 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10718 rtx mask, set, use, clob, dst, src;
10719 bool matching_memory;
10720 bool use_sse = false;
10721 bool vector_mode = VECTOR_MODE_P (mode);
10722 enum machine_mode elt_mode = mode;
10726 elt_mode = GET_MODE_INNER (mode);
10729 else if (mode == TFmode)
10731 else if (TARGET_SSE_MATH)
10732 use_sse = SSE_FLOAT_MODE_P (mode);
10734 /* NEG and ABS performed with SSE use bitwise mask operations.
10735 Create the appropriate mask now. */
10737 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10744 /* If the destination is memory, and we don't have matching source
10745 operands or we're using the x87, do things in registers. */
10746 matching_memory = false;
10749 if (use_sse && rtx_equal_p (dst, src))
10750 matching_memory = true;
10752 dst = gen_reg_rtx (mode);
10754 if (MEM_P (src) && !matching_memory)
10755 src = force_reg (mode, src);
10759 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10760 set = gen_rtx_SET (VOIDmode, dst, set);
10765 set = gen_rtx_fmt_e (code, mode, src);
10766 set = gen_rtx_SET (VOIDmode, dst, set);
10769 use = gen_rtx_USE (VOIDmode, mask);
10770 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10771 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10772 gen_rtvec (3, set, use, clob)));
10778 if (dst != operands[0])
10779 emit_move_insn (operands[0], dst);
10782 /* Expand a copysign operation. Special case operand 0 being a constant. */
10785 ix86_expand_copysign (rtx operands[])
10787 enum machine_mode mode, vmode;
10788 rtx dest, op0, op1, mask, nmask;
10790 dest = operands[0];
10794 mode = GET_MODE (dest);
10795 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10797 if (GET_CODE (op0) == CONST_DOUBLE)
10799 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
10801 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10802 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10804 if (mode == SFmode || mode == DFmode)
10806 if (op0 == CONST0_RTX (mode))
10807 op0 = CONST0_RTX (vmode);
10812 if (mode == SFmode)
10813 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10814 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10816 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10817 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10821 mask = ix86_build_signbit_mask (mode, 0, 0);
10823 if (mode == SFmode)
10824 copysign_insn = gen_copysignsf3_const;
10825 else if (mode == DFmode)
10826 copysign_insn = gen_copysigndf3_const;
10828 copysign_insn = gen_copysigntf3_const;
10830 emit_insn (copysign_insn (dest, op0, op1, mask));
10834 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
10836 nmask = ix86_build_signbit_mask (mode, 0, 1);
10837 mask = ix86_build_signbit_mask (mode, 0, 0);
10839 if (mode == SFmode)
10840 copysign_insn = gen_copysignsf3_var;
10841 else if (mode == DFmode)
10842 copysign_insn = gen_copysigndf3_var;
10844 copysign_insn = gen_copysigntf3_var;
10846 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
10850 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10851 be a constant, and so has already been expanded into a vector constant. */
10854 ix86_split_copysign_const (rtx operands[])
10856 enum machine_mode mode, vmode;
10857 rtx dest, op0, op1, mask, x;
10859 dest = operands[0];
10862 mask = operands[3];
10864 mode = GET_MODE (dest);
10865 vmode = GET_MODE (mask);
10867 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10868 x = gen_rtx_AND (vmode, dest, mask);
10869 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10871 if (op0 != CONST0_RTX (vmode))
10873 x = gen_rtx_IOR (vmode, dest, op0);
10874 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10878 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10879 so we have to do two masks. */
10882 ix86_split_copysign_var (rtx operands[])
10884 enum machine_mode mode, vmode;
10885 rtx dest, scratch, op0, op1, mask, nmask, x;
10887 dest = operands[0];
10888 scratch = operands[1];
10891 nmask = operands[4];
10892 mask = operands[5];
10894 mode = GET_MODE (dest);
10895 vmode = GET_MODE (mask);
10897 if (rtx_equal_p (op0, op1))
10899 /* Shouldn't happen often (it's useless, obviously), but when it does
10900 we'd generate incorrect code if we continue below. */
10901 emit_move_insn (dest, op0);
10905 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10907 gcc_assert (REGNO (op1) == REGNO (scratch));
10909 x = gen_rtx_AND (vmode, scratch, mask);
10910 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10913 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10914 x = gen_rtx_NOT (vmode, dest);
10915 x = gen_rtx_AND (vmode, x, op0);
10916 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10920 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10922 x = gen_rtx_AND (vmode, scratch, mask);
10924 else /* alternative 2,4 */
10926 gcc_assert (REGNO (mask) == REGNO (scratch));
10927 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10928 x = gen_rtx_AND (vmode, scratch, op1);
10930 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10932 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10934 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10935 x = gen_rtx_AND (vmode, dest, nmask);
10937 else /* alternative 3,4 */
10939 gcc_assert (REGNO (nmask) == REGNO (dest));
10941 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10942 x = gen_rtx_AND (vmode, dest, op0);
10944 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10947 x = gen_rtx_IOR (vmode, dest, scratch);
10948 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10951 /* Return TRUE or FALSE depending on whether the first SET in INSN
10952 has source and destination with matching CC modes, and that the
10953 CC mode is at least as constrained as REQ_MODE. */
10956 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10959 enum machine_mode set_mode;
10961 set = PATTERN (insn);
10962 if (GET_CODE (set) == PARALLEL)
10963 set = XVECEXP (set, 0, 0);
10964 gcc_assert (GET_CODE (set) == SET);
10965 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10967 set_mode = GET_MODE (SET_DEST (set));
10971 if (req_mode != CCNOmode
10972 && (req_mode != CCmode
10973 || XEXP (SET_SRC (set), 1) != const0_rtx))
10977 if (req_mode == CCGCmode)
10981 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10985 if (req_mode == CCZmode)
10992 gcc_unreachable ();
10995 return (GET_MODE (SET_SRC (set)) == set_mode);
10998 /* Generate insn patterns to do an integer compare of OPERANDS. */
11001 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11003 enum machine_mode cmpmode;
11006 cmpmode = SELECT_CC_MODE (code, op0, op1);
11007 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11009 /* This is very simple, but making the interface the same as in the
11010 FP case makes the rest of the code easier. */
11011 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11012 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11014 /* Return the test that should be put into the flags user, i.e.
11015 the bcc, scc, or cmov instruction. */
11016 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11019 /* Figure out whether to use ordered or unordered fp comparisons.
11020 Return the appropriate mode to use. */
11023 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11025 /* ??? In order to make all comparisons reversible, we do all comparisons
11026 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11027 all forms trapping and nontrapping comparisons, we can make inequality
11028 comparisons trapping again, since it results in better code when using
11029 FCOM based compares. */
11030 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11034 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11036 enum machine_mode mode = GET_MODE (op0);
11038 if (SCALAR_FLOAT_MODE_P (mode))
11040 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11041 return ix86_fp_compare_mode (code);
11046 /* Only zero flag is needed. */
11047 case EQ: /* ZF=0 */
11048 case NE: /* ZF!=0 */
11050 /* Codes needing carry flag. */
11051 case GEU: /* CF=0 */
11052 case LTU: /* CF=1 */
11053 /* Detect overflow checks. They need just the carry flag. */
11054 if (GET_CODE (op0) == PLUS
11055 && rtx_equal_p (op1, XEXP (op0, 0)))
11059 case GTU: /* CF=0 & ZF=0 */
11060 case LEU: /* CF=1 | ZF=1 */
11061 /* Detect overflow checks. They need just the carry flag. */
11062 if (GET_CODE (op0) == MINUS
11063 && rtx_equal_p (op1, XEXP (op0, 0)))
11067 /* Codes possibly doable only with sign flag when
11068 comparing against zero. */
11069 case GE: /* SF=OF or SF=0 */
11070 case LT: /* SF<>OF or SF=1 */
11071 if (op1 == const0_rtx)
11074 /* For other cases Carry flag is not required. */
11076 /* Codes doable only with sign flag when comparing
11077 against zero, but we miss jump instruction for it
11078 so we need to use relational tests against overflow
11079 that thus needs to be zero. */
11080 case GT: /* ZF=0 & SF=OF */
11081 case LE: /* ZF=1 | SF<>OF */
11082 if (op1 == const0_rtx)
11086 /* strcmp pattern do (use flags) and combine may ask us for proper
11091 gcc_unreachable ();
11095 /* Return the fixed registers used for condition codes. */
11098 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11105 /* If two condition code modes are compatible, return a condition code
11106 mode which is compatible with both. Otherwise, return
11109 static enum machine_mode
11110 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11115 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11118 if ((m1 == CCGCmode && m2 == CCGOCmode)
11119 || (m1 == CCGOCmode && m2 == CCGCmode))
11125 gcc_unreachable ();
11155 /* These are only compatible with themselves, which we already
11161 /* Split comparison code CODE into comparisons we can do using branch
11162 instructions. BYPASS_CODE is comparison code for branch that will
11163 branch around FIRST_CODE and SECOND_CODE. If some of branches
11164 is not required, set value to UNKNOWN.
11165 We never require more than two branches. */
11168 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11169 enum rtx_code *first_code,
11170 enum rtx_code *second_code)
11172 *first_code = code;
11173 *bypass_code = UNKNOWN;
11174 *second_code = UNKNOWN;
11176 /* The fcomi comparison sets flags as follows:
11186 case GT: /* GTU - CF=0 & ZF=0 */
11187 case GE: /* GEU - CF=0 */
11188 case ORDERED: /* PF=0 */
11189 case UNORDERED: /* PF=1 */
11190 case UNEQ: /* EQ - ZF=1 */
11191 case UNLT: /* LTU - CF=1 */
11192 case UNLE: /* LEU - CF=1 | ZF=1 */
11193 case LTGT: /* EQ - ZF=0 */
11195 case LT: /* LTU - CF=1 - fails on unordered */
11196 *first_code = UNLT;
11197 *bypass_code = UNORDERED;
11199 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11200 *first_code = UNLE;
11201 *bypass_code = UNORDERED;
11203 case EQ: /* EQ - ZF=1 - fails on unordered */
11204 *first_code = UNEQ;
11205 *bypass_code = UNORDERED;
11207 case NE: /* NE - ZF=0 - fails on unordered */
11208 *first_code = LTGT;
11209 *second_code = UNORDERED;
11211 case UNGE: /* GEU - CF=0 - fails on unordered */
11213 *second_code = UNORDERED;
11215 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11217 *second_code = UNORDERED;
11220 gcc_unreachable ();
11222 if (!TARGET_IEEE_FP)
11224 *second_code = UNKNOWN;
11225 *bypass_code = UNKNOWN;
11229 /* Return cost of comparison done fcom + arithmetics operations on AX.
11230 All following functions do use number of instructions as a cost metrics.
11231 In future this should be tweaked to compute bytes for optimize_size and
11232 take into account performance of various instructions on various CPUs. */
11234 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11236 if (!TARGET_IEEE_FP)
11238 /* The cost of code output by ix86_expand_fp_compare. */
11262 gcc_unreachable ();
11266 /* Return cost of comparison done using fcomi operation.
11267 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11269 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11271 enum rtx_code bypass_code, first_code, second_code;
11272 /* Return arbitrarily high cost when instruction is not supported - this
11273 prevents gcc from using it. */
11276 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11277 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11280 /* Return cost of comparison done using sahf operation.
11281 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11283 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11285 enum rtx_code bypass_code, first_code, second_code;
11286 /* Return arbitrarily high cost when instruction is not preferred - this
11287 avoids gcc from using it. */
11288 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11290 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11291 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11294 /* Compute cost of the comparison done using any method.
11295 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11297 ix86_fp_comparison_cost (enum rtx_code code)
11299 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11302 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11303 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11305 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11306 if (min > sahf_cost)
11308 if (min > fcomi_cost)
11313 /* Return true if we should use an FCOMI instruction for this
11317 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11319 enum rtx_code swapped_code = swap_condition (code);
11321 return ((ix86_fp_comparison_cost (code)
11322 == ix86_fp_comparison_fcomi_cost (code))
11323 || (ix86_fp_comparison_cost (swapped_code)
11324 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11327 /* Swap, force into registers, or otherwise massage the two operands
11328 to a fp comparison. The operands are updated in place; the new
11329 comparison code is returned. */
11331 static enum rtx_code
11332 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11334 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11335 rtx op0 = *pop0, op1 = *pop1;
11336 enum machine_mode op_mode = GET_MODE (op0);
11337 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11339 /* All of the unordered compare instructions only work on registers.
11340 The same is true of the fcomi compare instructions. The XFmode
11341 compare instructions require registers except when comparing
11342 against zero or when converting operand 1 from fixed point to
11346 && (fpcmp_mode == CCFPUmode
11347 || (op_mode == XFmode
11348 && ! (standard_80387_constant_p (op0) == 1
11349 || standard_80387_constant_p (op1) == 1)
11350 && GET_CODE (op1) != FLOAT)
11351 || ix86_use_fcomi_compare (code)))
11353 op0 = force_reg (op_mode, op0);
11354 op1 = force_reg (op_mode, op1);
11358 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11359 things around if they appear profitable, otherwise force op0
11360 into a register. */
11362 if (standard_80387_constant_p (op0) == 0
11364 && ! (standard_80387_constant_p (op1) == 0
11368 tmp = op0, op0 = op1, op1 = tmp;
11369 code = swap_condition (code);
11373 op0 = force_reg (op_mode, op0);
11375 if (CONSTANT_P (op1))
11377 int tmp = standard_80387_constant_p (op1);
11379 op1 = validize_mem (force_const_mem (op_mode, op1));
11383 op1 = force_reg (op_mode, op1);
11386 op1 = force_reg (op_mode, op1);
11390 /* Try to rearrange the comparison to make it cheaper. */
11391 if (ix86_fp_comparison_cost (code)
11392 > ix86_fp_comparison_cost (swap_condition (code))
11393 && (REG_P (op1) || can_create_pseudo_p ()))
11396 tmp = op0, op0 = op1, op1 = tmp;
11397 code = swap_condition (code);
11399 op0 = force_reg (op_mode, op0);
11407 /* Convert comparison codes we use to represent FP comparison to integer
11408 code that will result in proper branch. Return UNKNOWN if no such code
11412 ix86_fp_compare_code_to_integer (enum rtx_code code)
11441 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11444 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11445 rtx *second_test, rtx *bypass_test)
11447 enum machine_mode fpcmp_mode, intcmp_mode;
11449 int cost = ix86_fp_comparison_cost (code);
11450 enum rtx_code bypass_code, first_code, second_code;
11452 fpcmp_mode = ix86_fp_compare_mode (code);
11453 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11456 *second_test = NULL_RTX;
11458 *bypass_test = NULL_RTX;
11460 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11462 /* Do fcomi/sahf based test when profitable. */
11463 if ((TARGET_CMOVE || TARGET_SAHF)
11464 && (bypass_code == UNKNOWN || bypass_test)
11465 && (second_code == UNKNOWN || second_test)
11466 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11470 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11471 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11477 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11478 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11480 scratch = gen_reg_rtx (HImode);
11481 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11482 emit_insn (gen_x86_sahf_1 (scratch));
11485 /* The FP codes work out to act like unsigned. */
11486 intcmp_mode = fpcmp_mode;
11488 if (bypass_code != UNKNOWN)
11489 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11490 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11492 if (second_code != UNKNOWN)
11493 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11494 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11499 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11500 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11501 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11503 scratch = gen_reg_rtx (HImode);
11504 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11506 /* In the unordered case, we have to check C2 for NaN's, which
11507 doesn't happen to work out to anything nice combination-wise.
11508 So do some bit twiddling on the value we've got in AH to come
11509 up with an appropriate set of condition codes. */
11511 intcmp_mode = CCNOmode;
11516 if (code == GT || !TARGET_IEEE_FP)
11518 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11523 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11524 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11525 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11526 intcmp_mode = CCmode;
11532 if (code == LT && TARGET_IEEE_FP)
11534 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11535 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11536 intcmp_mode = CCmode;
11541 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11547 if (code == GE || !TARGET_IEEE_FP)
11549 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11554 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11555 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11562 if (code == LE && TARGET_IEEE_FP)
11564 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11565 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11566 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11567 intcmp_mode = CCmode;
11572 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11578 if (code == EQ && TARGET_IEEE_FP)
11580 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11581 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11582 intcmp_mode = CCmode;
11587 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11594 if (code == NE && TARGET_IEEE_FP)
11596 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11597 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11603 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11609 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11613 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11618 gcc_unreachable ();
11622 /* Return the test that should be put into the flags user, i.e.
11623 the bcc, scc, or cmov instruction. */
11624 return gen_rtx_fmt_ee (code, VOIDmode,
11625 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11630 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11633 op0 = ix86_compare_op0;
11634 op1 = ix86_compare_op1;
11637 *second_test = NULL_RTX;
11639 *bypass_test = NULL_RTX;
11641 if (ix86_compare_emitted)
11643 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11644 ix86_compare_emitted = NULL_RTX;
11646 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11648 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11649 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11650 second_test, bypass_test);
11653 ret = ix86_expand_int_compare (code, op0, op1);
11658 /* Return true if the CODE will result in nontrivial jump sequence. */
11660 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11662 enum rtx_code bypass_code, first_code, second_code;
11665 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11666 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11670 ix86_expand_branch (enum rtx_code code, rtx label)
11674 /* If we have emitted a compare insn, go straight to simple.
11675 ix86_expand_compare won't emit anything if ix86_compare_emitted
11677 if (ix86_compare_emitted)
11680 switch (GET_MODE (ix86_compare_op0))
11686 tmp = ix86_expand_compare (code, NULL, NULL);
11687 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11688 gen_rtx_LABEL_REF (VOIDmode, label),
11690 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11699 enum rtx_code bypass_code, first_code, second_code;
11701 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11702 &ix86_compare_op1);
11704 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11706 /* Check whether we will use the natural sequence with one jump. If
11707 so, we can expand jump early. Otherwise delay expansion by
11708 creating compound insn to not confuse optimizers. */
11709 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11712 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11713 gen_rtx_LABEL_REF (VOIDmode, label),
11714 pc_rtx, NULL_RTX, NULL_RTX);
11718 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11719 ix86_compare_op0, ix86_compare_op1);
11720 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11721 gen_rtx_LABEL_REF (VOIDmode, label),
11723 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11725 use_fcomi = ix86_use_fcomi_compare (code);
11726 vec = rtvec_alloc (3 + !use_fcomi);
11727 RTVEC_ELT (vec, 0) = tmp;
11729 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11731 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11734 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11736 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11745 /* Expand DImode branch into multiple compare+branch. */
11747 rtx lo[2], hi[2], label2;
11748 enum rtx_code code1, code2, code3;
11749 enum machine_mode submode;
11751 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11753 tmp = ix86_compare_op0;
11754 ix86_compare_op0 = ix86_compare_op1;
11755 ix86_compare_op1 = tmp;
11756 code = swap_condition (code);
11758 if (GET_MODE (ix86_compare_op0) == DImode)
11760 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11761 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11766 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11767 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11771 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11772 avoid two branches. This costs one extra insn, so disable when
11773 optimizing for size. */
11775 if ((code == EQ || code == NE)
11777 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11782 if (hi[1] != const0_rtx)
11783 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11784 NULL_RTX, 0, OPTAB_WIDEN);
11787 if (lo[1] != const0_rtx)
11788 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11789 NULL_RTX, 0, OPTAB_WIDEN);
11791 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11792 NULL_RTX, 0, OPTAB_WIDEN);
11794 ix86_compare_op0 = tmp;
11795 ix86_compare_op1 = const0_rtx;
11796 ix86_expand_branch (code, label);
11800 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11801 op1 is a constant and the low word is zero, then we can just
11802 examine the high word. */
11804 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11807 case LT: case LTU: case GE: case GEU:
11808 ix86_compare_op0 = hi[0];
11809 ix86_compare_op1 = hi[1];
11810 ix86_expand_branch (code, label);
11816 /* Otherwise, we need two or three jumps. */
11818 label2 = gen_label_rtx ();
11821 code2 = swap_condition (code);
11822 code3 = unsigned_condition (code);
11826 case LT: case GT: case LTU: case GTU:
11829 case LE: code1 = LT; code2 = GT; break;
11830 case GE: code1 = GT; code2 = LT; break;
11831 case LEU: code1 = LTU; code2 = GTU; break;
11832 case GEU: code1 = GTU; code2 = LTU; break;
11834 case EQ: code1 = UNKNOWN; code2 = NE; break;
11835 case NE: code2 = UNKNOWN; break;
11838 gcc_unreachable ();
11843 * if (hi(a) < hi(b)) goto true;
11844 * if (hi(a) > hi(b)) goto false;
11845 * if (lo(a) < lo(b)) goto true;
11849 ix86_compare_op0 = hi[0];
11850 ix86_compare_op1 = hi[1];
11852 if (code1 != UNKNOWN)
11853 ix86_expand_branch (code1, label);
11854 if (code2 != UNKNOWN)
11855 ix86_expand_branch (code2, label2);
11857 ix86_compare_op0 = lo[0];
11858 ix86_compare_op1 = lo[1];
11859 ix86_expand_branch (code3, label);
11861 if (code2 != UNKNOWN)
11862 emit_label (label2);
11867 gcc_unreachable ();
11871 /* Split branch based on floating point condition. */
11873 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11874 rtx target1, rtx target2, rtx tmp, rtx pushed)
11876 rtx second, bypass;
11877 rtx label = NULL_RTX;
11879 int bypass_probability = -1, second_probability = -1, probability = -1;
11882 if (target2 != pc_rtx)
11885 code = reverse_condition_maybe_unordered (code);
11890 condition = ix86_expand_fp_compare (code, op1, op2,
11891 tmp, &second, &bypass);
11893 /* Remove pushed operand from stack. */
11895 ix86_free_from_memory (GET_MODE (pushed));
11897 if (split_branch_probability >= 0)
11899 /* Distribute the probabilities across the jumps.
11900 Assume the BYPASS and SECOND to be always test
11902 probability = split_branch_probability;
11904 /* Value of 1 is low enough to make no need for probability
11905 to be updated. Later we may run some experiments and see
11906 if unordered values are more frequent in practice. */
11908 bypass_probability = 1;
11910 second_probability = 1;
11912 if (bypass != NULL_RTX)
11914 label = gen_label_rtx ();
11915 i = emit_jump_insn (gen_rtx_SET
11917 gen_rtx_IF_THEN_ELSE (VOIDmode,
11919 gen_rtx_LABEL_REF (VOIDmode,
11922 if (bypass_probability >= 0)
11924 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11925 GEN_INT (bypass_probability),
11928 i = emit_jump_insn (gen_rtx_SET
11930 gen_rtx_IF_THEN_ELSE (VOIDmode,
11931 condition, target1, target2)));
11932 if (probability >= 0)
11934 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11935 GEN_INT (probability),
11937 if (second != NULL_RTX)
11939 i = emit_jump_insn (gen_rtx_SET
11941 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11943 if (second_probability >= 0)
11945 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11946 GEN_INT (second_probability),
11949 if (label != NULL_RTX)
11950 emit_label (label);
11954 ix86_expand_setcc (enum rtx_code code, rtx dest)
11956 rtx ret, tmp, tmpreg, equiv;
11957 rtx second_test, bypass_test;
11959 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11960 return 0; /* FAIL */
11962 gcc_assert (GET_MODE (dest) == QImode);
11964 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11965 PUT_MODE (ret, QImode);
11970 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11971 if (bypass_test || second_test)
11973 rtx test = second_test;
11975 rtx tmp2 = gen_reg_rtx (QImode);
11978 gcc_assert (!second_test);
11979 test = bypass_test;
11981 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11983 PUT_MODE (test, QImode);
11984 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11987 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11989 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11992 /* Attach a REG_EQUAL note describing the comparison result. */
11993 if (ix86_compare_op0 && ix86_compare_op1)
11995 equiv = simplify_gen_relational (code, QImode,
11996 GET_MODE (ix86_compare_op0),
11997 ix86_compare_op0, ix86_compare_op1);
11998 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12001 return 1; /* DONE */
12004 /* Expand comparison setting or clearing carry flag. Return true when
12005 successful and set pop for the operation. */
12007 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12009 enum machine_mode mode =
12010 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12012 /* Do not handle DImode compares that go through special path.
12013 Also we can't deal with FP compares yet. This is possible to add. */
12014 if (mode == (TARGET_64BIT ? TImode : DImode))
12017 if (SCALAR_FLOAT_MODE_P (mode))
12019 rtx second_test = NULL, bypass_test = NULL;
12020 rtx compare_op, compare_seq;
12022 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12024 /* Shortcut: following common codes never translate
12025 into carry flag compares. */
12026 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12027 || code == ORDERED || code == UNORDERED)
12030 /* These comparisons require zero flag; swap operands so they won't. */
12031 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12032 && !TARGET_IEEE_FP)
12037 code = swap_condition (code);
12040 /* Try to expand the comparison and verify that we end up with carry flag
12041 based comparison. This is fails to be true only when we decide to expand
12042 comparison using arithmetic that is not too common scenario. */
12044 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12045 &second_test, &bypass_test);
12046 compare_seq = get_insns ();
12049 if (second_test || bypass_test)
12051 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12052 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12053 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12055 code = GET_CODE (compare_op);
12056 if (code != LTU && code != GEU)
12058 emit_insn (compare_seq);
12062 if (!INTEGRAL_MODE_P (mode))
12070 /* Convert a==0 into (unsigned)a<1. */
12073 if (op1 != const0_rtx)
12076 code = (code == EQ ? LTU : GEU);
12079 /* Convert a>b into b<a or a>=b-1. */
12082 if (CONST_INT_P (op1))
12084 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12085 /* Bail out on overflow. We still can swap operands but that
12086 would force loading of the constant into register. */
12087 if (op1 == const0_rtx
12088 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12090 code = (code == GTU ? GEU : LTU);
12097 code = (code == GTU ? LTU : GEU);
12101 /* Convert a>=0 into (unsigned)a<0x80000000. */
12104 if (mode == DImode || op1 != const0_rtx)
12106 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12107 code = (code == LT ? GEU : LTU);
12111 if (mode == DImode || op1 != constm1_rtx)
12113 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12114 code = (code == LE ? GEU : LTU);
12120 /* Swapping operands may cause constant to appear as first operand. */
12121 if (!nonimmediate_operand (op0, VOIDmode))
12123 if (!can_create_pseudo_p ())
12125 op0 = force_reg (mode, op0);
12127 ix86_compare_op0 = op0;
12128 ix86_compare_op1 = op1;
12129 *pop = ix86_expand_compare (code, NULL, NULL);
12130 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12135 ix86_expand_int_movcc (rtx operands[])
12137 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12138 rtx compare_seq, compare_op;
12139 rtx second_test, bypass_test;
12140 enum machine_mode mode = GET_MODE (operands[0]);
12141 bool sign_bit_compare_p = false;;
12144 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12145 compare_seq = get_insns ();
12148 compare_code = GET_CODE (compare_op);
12150 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12151 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12152 sign_bit_compare_p = true;
12154 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12155 HImode insns, we'd be swallowed in word prefix ops. */
12157 if ((mode != HImode || TARGET_FAST_PREFIX)
12158 && (mode != (TARGET_64BIT ? TImode : DImode))
12159 && CONST_INT_P (operands[2])
12160 && CONST_INT_P (operands[3]))
12162 rtx out = operands[0];
12163 HOST_WIDE_INT ct = INTVAL (operands[2]);
12164 HOST_WIDE_INT cf = INTVAL (operands[3]);
12165 HOST_WIDE_INT diff;
12168 /* Sign bit compares are better done using shifts than we do by using
12170 if (sign_bit_compare_p
12171 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12172 ix86_compare_op1, &compare_op))
12174 /* Detect overlap between destination and compare sources. */
12177 if (!sign_bit_compare_p)
12179 bool fpcmp = false;
12181 compare_code = GET_CODE (compare_op);
12183 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12184 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12187 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12190 /* To simplify rest of code, restrict to the GEU case. */
12191 if (compare_code == LTU)
12193 HOST_WIDE_INT tmp = ct;
12196 compare_code = reverse_condition (compare_code);
12197 code = reverse_condition (code);
12202 PUT_CODE (compare_op,
12203 reverse_condition_maybe_unordered
12204 (GET_CODE (compare_op)));
12206 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12210 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12211 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12212 tmp = gen_reg_rtx (mode);
12214 if (mode == DImode)
12215 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12217 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12221 if (code == GT || code == GE)
12222 code = reverse_condition (code);
12225 HOST_WIDE_INT tmp = ct;
12230 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12231 ix86_compare_op1, VOIDmode, 0, -1);
12244 tmp = expand_simple_binop (mode, PLUS,
12246 copy_rtx (tmp), 1, OPTAB_DIRECT);
12257 tmp = expand_simple_binop (mode, IOR,
12259 copy_rtx (tmp), 1, OPTAB_DIRECT);
12261 else if (diff == -1 && ct)
12271 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12273 tmp = expand_simple_binop (mode, PLUS,
12274 copy_rtx (tmp), GEN_INT (cf),
12275 copy_rtx (tmp), 1, OPTAB_DIRECT);
12283 * andl cf - ct, dest
12293 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12296 tmp = expand_simple_binop (mode, AND,
12298 gen_int_mode (cf - ct, mode),
12299 copy_rtx (tmp), 1, OPTAB_DIRECT);
12301 tmp = expand_simple_binop (mode, PLUS,
12302 copy_rtx (tmp), GEN_INT (ct),
12303 copy_rtx (tmp), 1, OPTAB_DIRECT);
12306 if (!rtx_equal_p (tmp, out))
12307 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12309 return 1; /* DONE */
12314 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12317 tmp = ct, ct = cf, cf = tmp;
12320 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12322 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12324 /* We may be reversing unordered compare to normal compare, that
12325 is not valid in general (we may convert non-trapping condition
12326 to trapping one), however on i386 we currently emit all
12327 comparisons unordered. */
12328 compare_code = reverse_condition_maybe_unordered (compare_code);
12329 code = reverse_condition_maybe_unordered (code);
12333 compare_code = reverse_condition (compare_code);
12334 code = reverse_condition (code);
12338 compare_code = UNKNOWN;
12339 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12340 && CONST_INT_P (ix86_compare_op1))
12342 if (ix86_compare_op1 == const0_rtx
12343 && (code == LT || code == GE))
12344 compare_code = code;
12345 else if (ix86_compare_op1 == constm1_rtx)
12349 else if (code == GT)
12354 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12355 if (compare_code != UNKNOWN
12356 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12357 && (cf == -1 || ct == -1))
12359 /* If lea code below could be used, only optimize
12360 if it results in a 2 insn sequence. */
12362 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12363 || diff == 3 || diff == 5 || diff == 9)
12364 || (compare_code == LT && ct == -1)
12365 || (compare_code == GE && cf == -1))
12368 * notl op1 (if necessary)
12376 code = reverse_condition (code);
12379 out = emit_store_flag (out, code, ix86_compare_op0,
12380 ix86_compare_op1, VOIDmode, 0, -1);
12382 out = expand_simple_binop (mode, IOR,
12384 out, 1, OPTAB_DIRECT);
12385 if (out != operands[0])
12386 emit_move_insn (operands[0], out);
12388 return 1; /* DONE */
12393 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12394 || diff == 3 || diff == 5 || diff == 9)
12395 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12397 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12403 * lea cf(dest*(ct-cf)),dest
12407 * This also catches the degenerate setcc-only case.
12413 out = emit_store_flag (out, code, ix86_compare_op0,
12414 ix86_compare_op1, VOIDmode, 0, 1);
12417 /* On x86_64 the lea instruction operates on Pmode, so we need
12418 to get arithmetics done in proper mode to match. */
12420 tmp = copy_rtx (out);
12424 out1 = copy_rtx (out);
12425 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12429 tmp = gen_rtx_PLUS (mode, tmp, out1);
12435 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12438 if (!rtx_equal_p (tmp, out))
12441 out = force_operand (tmp, copy_rtx (out));
12443 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12445 if (!rtx_equal_p (out, operands[0]))
12446 emit_move_insn (operands[0], copy_rtx (out));
12448 return 1; /* DONE */
12452 * General case: Jumpful:
12453 * xorl dest,dest cmpl op1, op2
12454 * cmpl op1, op2 movl ct, dest
12455 * setcc dest jcc 1f
12456 * decl dest movl cf, dest
12457 * andl (cf-ct),dest 1:
12460 * Size 20. Size 14.
12462 * This is reasonably steep, but branch mispredict costs are
12463 * high on modern cpus, so consider failing only if optimizing
12467 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12468 && BRANCH_COST >= 2)
12472 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12477 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12479 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12481 /* We may be reversing unordered compare to normal compare,
12482 that is not valid in general (we may convert non-trapping
12483 condition to trapping one), however on i386 we currently
12484 emit all comparisons unordered. */
12485 code = reverse_condition_maybe_unordered (code);
12489 code = reverse_condition (code);
12490 if (compare_code != UNKNOWN)
12491 compare_code = reverse_condition (compare_code);
12495 if (compare_code != UNKNOWN)
12497 /* notl op1 (if needed)
12502 For x < 0 (resp. x <= -1) there will be no notl,
12503 so if possible swap the constants to get rid of the
12505 True/false will be -1/0 while code below (store flag
12506 followed by decrement) is 0/-1, so the constants need
12507 to be exchanged once more. */
12509 if (compare_code == GE || !cf)
12511 code = reverse_condition (code);
12516 HOST_WIDE_INT tmp = cf;
12521 out = emit_store_flag (out, code, ix86_compare_op0,
12522 ix86_compare_op1, VOIDmode, 0, -1);
12526 out = emit_store_flag (out, code, ix86_compare_op0,
12527 ix86_compare_op1, VOIDmode, 0, 1);
12529 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12530 copy_rtx (out), 1, OPTAB_DIRECT);
12533 out = expand_simple_binop (mode, AND, copy_rtx (out),
12534 gen_int_mode (cf - ct, mode),
12535 copy_rtx (out), 1, OPTAB_DIRECT);
12537 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12538 copy_rtx (out), 1, OPTAB_DIRECT);
12539 if (!rtx_equal_p (out, operands[0]))
12540 emit_move_insn (operands[0], copy_rtx (out));
12542 return 1; /* DONE */
12546 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12548 /* Try a few things more with specific constants and a variable. */
12551 rtx var, orig_out, out, tmp;
12553 if (BRANCH_COST <= 2)
12554 return 0; /* FAIL */
12556 /* If one of the two operands is an interesting constant, load a
12557 constant with the above and mask it in with a logical operation. */
12559 if (CONST_INT_P (operands[2]))
12562 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12563 operands[3] = constm1_rtx, op = and_optab;
12564 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12565 operands[3] = const0_rtx, op = ior_optab;
12567 return 0; /* FAIL */
12569 else if (CONST_INT_P (operands[3]))
12572 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12573 operands[2] = constm1_rtx, op = and_optab;
12574 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12575 operands[2] = const0_rtx, op = ior_optab;
12577 return 0; /* FAIL */
12580 return 0; /* FAIL */
12582 orig_out = operands[0];
12583 tmp = gen_reg_rtx (mode);
12586 /* Recurse to get the constant loaded. */
12587 if (ix86_expand_int_movcc (operands) == 0)
12588 return 0; /* FAIL */
12590 /* Mask in the interesting variable. */
12591 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12593 if (!rtx_equal_p (out, orig_out))
12594 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12596 return 1; /* DONE */
12600 * For comparison with above,
12610 if (! nonimmediate_operand (operands[2], mode))
12611 operands[2] = force_reg (mode, operands[2]);
12612 if (! nonimmediate_operand (operands[3], mode))
12613 operands[3] = force_reg (mode, operands[3]);
12615 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12617 rtx tmp = gen_reg_rtx (mode);
12618 emit_move_insn (tmp, operands[3]);
12621 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12623 rtx tmp = gen_reg_rtx (mode);
12624 emit_move_insn (tmp, operands[2]);
12628 if (! register_operand (operands[2], VOIDmode)
12630 || ! register_operand (operands[3], VOIDmode)))
12631 operands[2] = force_reg (mode, operands[2]);
12634 && ! register_operand (operands[3], VOIDmode))
12635 operands[3] = force_reg (mode, operands[3]);
12637 emit_insn (compare_seq);
12638 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12639 gen_rtx_IF_THEN_ELSE (mode,
12640 compare_op, operands[2],
12643 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12644 gen_rtx_IF_THEN_ELSE (mode,
12646 copy_rtx (operands[3]),
12647 copy_rtx (operands[0]))));
12649 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12650 gen_rtx_IF_THEN_ELSE (mode,
12652 copy_rtx (operands[2]),
12653 copy_rtx (operands[0]))));
12655 return 1; /* DONE */
12658 /* Swap, force into registers, or otherwise massage the two operands
12659 to an sse comparison with a mask result. Thus we differ a bit from
12660 ix86_prepare_fp_compare_args which expects to produce a flags result.
12662 The DEST operand exists to help determine whether to commute commutative
12663 operators. The POP0/POP1 operands are updated in place. The new
12664 comparison code is returned, or UNKNOWN if not implementable. */
12666 static enum rtx_code
12667 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12668 rtx *pop0, rtx *pop1)
12676 /* We have no LTGT as an operator. We could implement it with
12677 NE & ORDERED, but this requires an extra temporary. It's
12678 not clear that it's worth it. */
12685 /* These are supported directly. */
12692 /* For commutative operators, try to canonicalize the destination
12693 operand to be first in the comparison - this helps reload to
12694 avoid extra moves. */
12695 if (!dest || !rtx_equal_p (dest, *pop1))
12703 /* These are not supported directly. Swap the comparison operands
12704 to transform into something that is supported. */
12708 code = swap_condition (code);
12712 gcc_unreachable ();
12718 /* Detect conditional moves that exactly match min/max operational
12719 semantics. Note that this is IEEE safe, as long as we don't
12720 interchange the operands.
12722 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12723 and TRUE if the operation is successful and instructions are emitted. */
12726 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12727 rtx cmp_op1, rtx if_true, rtx if_false)
12729 enum machine_mode mode;
12735 else if (code == UNGE)
12738 if_true = if_false;
12744 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12746 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12751 mode = GET_MODE (dest);
12753 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12754 but MODE may be a vector mode and thus not appropriate. */
12755 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12757 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12760 if_true = force_reg (mode, if_true);
12761 v = gen_rtvec (2, if_true, if_false);
12762 tmp = gen_rtx_UNSPEC (mode, v, u);
12766 code = is_min ? SMIN : SMAX;
12767 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12770 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12774 /* Expand an sse vector comparison. Return the register with the result. */
12777 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12778 rtx op_true, rtx op_false)
12780 enum machine_mode mode = GET_MODE (dest);
12783 cmp_op0 = force_reg (mode, cmp_op0);
12784 if (!nonimmediate_operand (cmp_op1, mode))
12785 cmp_op1 = force_reg (mode, cmp_op1);
12788 || reg_overlap_mentioned_p (dest, op_true)
12789 || reg_overlap_mentioned_p (dest, op_false))
12790 dest = gen_reg_rtx (mode);
12792 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12793 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12798 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12799 operations. This is used for both scalar and vector conditional moves. */
12802 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12804 enum machine_mode mode = GET_MODE (dest);
12807 if (op_false == CONST0_RTX (mode))
12809 op_true = force_reg (mode, op_true);
12810 x = gen_rtx_AND (mode, cmp, op_true);
12811 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12813 else if (op_true == CONST0_RTX (mode))
12815 op_false = force_reg (mode, op_false);
12816 x = gen_rtx_NOT (mode, cmp);
12817 x = gen_rtx_AND (mode, x, op_false);
12818 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12822 op_true = force_reg (mode, op_true);
12823 op_false = force_reg (mode, op_false);
12825 t2 = gen_reg_rtx (mode);
12827 t3 = gen_reg_rtx (mode);
12831 x = gen_rtx_AND (mode, op_true, cmp);
12832 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12834 x = gen_rtx_NOT (mode, cmp);
12835 x = gen_rtx_AND (mode, x, op_false);
12836 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12838 x = gen_rtx_IOR (mode, t3, t2);
12839 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12843 /* Expand a floating-point conditional move. Return true if successful. */
12846 ix86_expand_fp_movcc (rtx operands[])
12848 enum machine_mode mode = GET_MODE (operands[0]);
12849 enum rtx_code code = GET_CODE (operands[1]);
12850 rtx tmp, compare_op, second_test, bypass_test;
12852 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12854 enum machine_mode cmode;
12856 /* Since we've no cmove for sse registers, don't force bad register
12857 allocation just to gain access to it. Deny movcc when the
12858 comparison mode doesn't match the move mode. */
12859 cmode = GET_MODE (ix86_compare_op0);
12860 if (cmode == VOIDmode)
12861 cmode = GET_MODE (ix86_compare_op1);
12865 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12867 &ix86_compare_op1);
12868 if (code == UNKNOWN)
12871 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12872 ix86_compare_op1, operands[2],
12876 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12877 ix86_compare_op1, operands[2], operands[3]);
12878 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12882 /* The floating point conditional move instructions don't directly
12883 support conditions resulting from a signed integer comparison. */
12885 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12887 /* The floating point conditional move instructions don't directly
12888 support signed integer comparisons. */
12890 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12892 gcc_assert (!second_test && !bypass_test);
12893 tmp = gen_reg_rtx (QImode);
12894 ix86_expand_setcc (code, tmp);
12896 ix86_compare_op0 = tmp;
12897 ix86_compare_op1 = const0_rtx;
12898 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12900 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12902 tmp = gen_reg_rtx (mode);
12903 emit_move_insn (tmp, operands[3]);
12906 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12908 tmp = gen_reg_rtx (mode);
12909 emit_move_insn (tmp, operands[2]);
12913 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12914 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12915 operands[2], operands[3])));
12917 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12918 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12919 operands[3], operands[0])));
12921 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12922 gen_rtx_IF_THEN_ELSE (mode, second_test,
12923 operands[2], operands[0])));
12928 /* Expand a floating-point vector conditional move; a vcond operation
12929 rather than a movcc operation. */
12932 ix86_expand_fp_vcond (rtx operands[])
12934 enum rtx_code code = GET_CODE (operands[3]);
12937 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12938 &operands[4], &operands[5]);
12939 if (code == UNKNOWN)
12942 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12943 operands[5], operands[1], operands[2]))
12946 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12947 operands[1], operands[2]);
12948 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12952 /* Expand a signed/unsigned integral vector conditional move. */
12955 ix86_expand_int_vcond (rtx operands[])
12957 enum machine_mode mode = GET_MODE (operands[0]);
12958 enum rtx_code code = GET_CODE (operands[3]);
12959 bool negate = false;
12962 cop0 = operands[4];
12963 cop1 = operands[5];
12965 /* Canonicalize the comparison to EQ, GT, GTU. */
12976 code = reverse_condition (code);
12982 code = reverse_condition (code);
12988 code = swap_condition (code);
12989 x = cop0, cop0 = cop1, cop1 = x;
12993 gcc_unreachable ();
12996 /* Only SSE4.1/SSE4.2 supports V2DImode. */
12997 if (mode == V2DImode)
13002 /* SSE4.1 supports EQ. */
13003 if (!TARGET_SSE4_1)
13009 /* SSE4.2 supports GT/GTU. */
13010 if (!TARGET_SSE4_2)
13015 gcc_unreachable ();
13019 /* Unsigned parallel compare is not supported by the hardware. Play some
13020 tricks to turn this into a signed comparison against 0. */
13023 cop0 = force_reg (mode, cop0);
13032 /* Perform a parallel modulo subtraction. */
13033 t1 = gen_reg_rtx (mode);
13034 emit_insn ((mode == V4SImode
13036 : gen_subv2di3) (t1, cop0, cop1));
13038 /* Extract the original sign bit of op0. */
13039 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13041 t2 = gen_reg_rtx (mode);
13042 emit_insn ((mode == V4SImode
13044 : gen_andv2di3) (t2, cop0, mask));
13046 /* XOR it back into the result of the subtraction. This results
13047 in the sign bit set iff we saw unsigned underflow. */
13048 x = gen_reg_rtx (mode);
13049 emit_insn ((mode == V4SImode
13051 : gen_xorv2di3) (x, t1, t2));
13059 /* Perform a parallel unsigned saturating subtraction. */
13060 x = gen_reg_rtx (mode);
13061 emit_insn (gen_rtx_SET (VOIDmode, x,
13062 gen_rtx_US_MINUS (mode, cop0, cop1)));
13069 gcc_unreachable ();
13073 cop1 = CONST0_RTX (mode);
13076 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13077 operands[1+negate], operands[2-negate]);
13079 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13080 operands[2-negate]);
13084 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13085 true if we should do zero extension, else sign extension. HIGH_P is
13086 true if we want the N/2 high elements, else the low elements. */
13089 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13091 enum machine_mode imode = GET_MODE (operands[1]);
13092 rtx (*unpack)(rtx, rtx, rtx);
13099 unpack = gen_vec_interleave_highv16qi;
13101 unpack = gen_vec_interleave_lowv16qi;
13105 unpack = gen_vec_interleave_highv8hi;
13107 unpack = gen_vec_interleave_lowv8hi;
13111 unpack = gen_vec_interleave_highv4si;
13113 unpack = gen_vec_interleave_lowv4si;
13116 gcc_unreachable ();
13119 dest = gen_lowpart (imode, operands[0]);
13122 se = force_reg (imode, CONST0_RTX (imode));
13124 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13125 operands[1], pc_rtx, pc_rtx);
13127 emit_insn (unpack (dest, operands[1], se));
13130 /* This function performs the same task as ix86_expand_sse_unpack,
13131 but with SSE4.1 instructions. */
13134 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13136 enum machine_mode imode = GET_MODE (operands[1]);
13137 rtx (*unpack)(rtx, rtx);
13144 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13146 unpack = gen_sse4_1_extendv8qiv8hi2;
13150 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13152 unpack = gen_sse4_1_extendv4hiv4si2;
13156 unpack = gen_sse4_1_zero_extendv2siv2di2;
13158 unpack = gen_sse4_1_extendv2siv2di2;
13161 gcc_unreachable ();
13164 dest = operands[0];
13167 /* Shift higher 8 bytes to lower 8 bytes. */
13168 src = gen_reg_rtx (imode);
13169 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13170 gen_lowpart (TImode, operands[1]),
13176 emit_insn (unpack (dest, src));
13179 /* Expand conditional increment or decrement using adb/sbb instructions.
13180 The default case using setcc followed by the conditional move can be
13181 done by generic code. */
13183 ix86_expand_int_addcc (rtx operands[])
13185 enum rtx_code code = GET_CODE (operands[1]);
13187 rtx val = const0_rtx;
13188 bool fpcmp = false;
13189 enum machine_mode mode = GET_MODE (operands[0]);
13191 if (operands[3] != const1_rtx
13192 && operands[3] != constm1_rtx)
13194 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13195 ix86_compare_op1, &compare_op))
13197 code = GET_CODE (compare_op);
13199 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13200 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13203 code = ix86_fp_compare_code_to_integer (code);
13210 PUT_CODE (compare_op,
13211 reverse_condition_maybe_unordered
13212 (GET_CODE (compare_op)));
13214 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13216 PUT_MODE (compare_op, mode);
13218 /* Construct either adc or sbb insn. */
13219 if ((code == LTU) == (operands[3] == constm1_rtx))
13221 switch (GET_MODE (operands[0]))
13224 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13227 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13230 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13233 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13236 gcc_unreachable ();
13241 switch (GET_MODE (operands[0]))
13244 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13247 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13250 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13253 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13256 gcc_unreachable ();
13259 return 1; /* DONE */
13263 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13264 works for floating pointer parameters and nonoffsetable memories.
13265 For pushes, it returns just stack offsets; the values will be saved
13266 in the right order. Maximally three parts are generated. */
13269 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13274 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13276 size = (GET_MODE_SIZE (mode) + 4) / 8;
13278 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13279 gcc_assert (size >= 2 && size <= 3);
13281 /* Optimize constant pool reference to immediates. This is used by fp
13282 moves, that force all constants to memory to allow combining. */
13283 if (MEM_P (operand) && MEM_READONLY_P (operand))
13285 rtx tmp = maybe_get_pool_constant (operand);
13290 if (MEM_P (operand) && !offsettable_memref_p (operand))
13292 /* The only non-offsetable memories we handle are pushes. */
13293 int ok = push_operand (operand, VOIDmode);
13297 operand = copy_rtx (operand);
13298 PUT_MODE (operand, Pmode);
13299 parts[0] = parts[1] = parts[2] = operand;
13303 if (GET_CODE (operand) == CONST_VECTOR)
13305 enum machine_mode imode = int_mode_for_mode (mode);
13306 /* Caution: if we looked through a constant pool memory above,
13307 the operand may actually have a different mode now. That's
13308 ok, since we want to pun this all the way back to an integer. */
13309 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13310 gcc_assert (operand != NULL);
13316 if (mode == DImode)
13317 split_di (&operand, 1, &parts[0], &parts[1]);
13320 if (REG_P (operand))
13322 gcc_assert (reload_completed);
13323 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13324 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13326 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13328 else if (offsettable_memref_p (operand))
13330 operand = adjust_address (operand, SImode, 0);
13331 parts[0] = operand;
13332 parts[1] = adjust_address (operand, SImode, 4);
13334 parts[2] = adjust_address (operand, SImode, 8);
13336 else if (GET_CODE (operand) == CONST_DOUBLE)
13341 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13345 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13346 parts[2] = gen_int_mode (l[2], SImode);
13349 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13352 gcc_unreachable ();
13354 parts[1] = gen_int_mode (l[1], SImode);
13355 parts[0] = gen_int_mode (l[0], SImode);
13358 gcc_unreachable ();
13363 if (mode == TImode)
13364 split_ti (&operand, 1, &parts[0], &parts[1]);
13365 if (mode == XFmode || mode == TFmode)
13367 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13368 if (REG_P (operand))
13370 gcc_assert (reload_completed);
13371 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13372 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13374 else if (offsettable_memref_p (operand))
13376 operand = adjust_address (operand, DImode, 0);
13377 parts[0] = operand;
13378 parts[1] = adjust_address (operand, upper_mode, 8);
13380 else if (GET_CODE (operand) == CONST_DOUBLE)
13385 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13386 real_to_target (l, &r, mode);
13388 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13389 if (HOST_BITS_PER_WIDE_INT >= 64)
13392 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13393 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13396 parts[0] = immed_double_const (l[0], l[1], DImode);
13398 if (upper_mode == SImode)
13399 parts[1] = gen_int_mode (l[2], SImode);
13400 else if (HOST_BITS_PER_WIDE_INT >= 64)
13403 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13404 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13407 parts[1] = immed_double_const (l[2], l[3], DImode);
13410 gcc_unreachable ();
13417 /* Emit insns to perform a move or push of DI, DF, and XF values.
13418 Return false when normal moves are needed; true when all required
13419 insns have been emitted. Operands 2-4 contain the input values
13420 int the correct order; operands 5-7 contain the output values. */
13423 ix86_split_long_move (rtx operands[])
13428 int collisions = 0;
13429 enum machine_mode mode = GET_MODE (operands[0]);
13431 /* The DFmode expanders may ask us to move double.
13432 For 64bit target this is single move. By hiding the fact
13433 here we simplify i386.md splitters. */
13434 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13436 /* Optimize constant pool reference to immediates. This is used by
13437 fp moves, that force all constants to memory to allow combining. */
13439 if (MEM_P (operands[1])
13440 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13441 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13442 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13443 if (push_operand (operands[0], VOIDmode))
13445 operands[0] = copy_rtx (operands[0]);
13446 PUT_MODE (operands[0], Pmode);
13449 operands[0] = gen_lowpart (DImode, operands[0]);
13450 operands[1] = gen_lowpart (DImode, operands[1]);
13451 emit_move_insn (operands[0], operands[1]);
13455 /* The only non-offsettable memory we handle is push. */
13456 if (push_operand (operands[0], VOIDmode))
13459 gcc_assert (!MEM_P (operands[0])
13460 || offsettable_memref_p (operands[0]));
13462 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13463 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13465 /* When emitting push, take care for source operands on the stack. */
13466 if (push && MEM_P (operands[1])
13467 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13470 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13471 XEXP (part[1][2], 0));
13472 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13473 XEXP (part[1][1], 0));
13476 /* We need to do copy in the right order in case an address register
13477 of the source overlaps the destination. */
13478 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13480 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13482 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13485 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13488 /* Collision in the middle part can be handled by reordering. */
13489 if (collisions == 1 && nparts == 3
13490 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13493 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13494 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13497 /* If there are more collisions, we can't handle it by reordering.
13498 Do an lea to the last part and use only one colliding move. */
13499 else if (collisions > 1)
13505 base = part[0][nparts - 1];
13507 /* Handle the case when the last part isn't valid for lea.
13508 Happens in 64-bit mode storing the 12-byte XFmode. */
13509 if (GET_MODE (base) != Pmode)
13510 base = gen_rtx_REG (Pmode, REGNO (base));
13512 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13513 part[1][0] = replace_equiv_address (part[1][0], base);
13514 part[1][1] = replace_equiv_address (part[1][1],
13515 plus_constant (base, UNITS_PER_WORD));
13517 part[1][2] = replace_equiv_address (part[1][2],
13518 plus_constant (base, 8));
13528 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13529 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13530 emit_move_insn (part[0][2], part[1][2]);
13535 /* In 64bit mode we don't have 32bit push available. In case this is
13536 register, it is OK - we will just use larger counterpart. We also
13537 retype memory - these comes from attempt to avoid REX prefix on
13538 moving of second half of TFmode value. */
13539 if (GET_MODE (part[1][1]) == SImode)
13541 switch (GET_CODE (part[1][1]))
13544 part[1][1] = adjust_address (part[1][1], DImode, 0);
13548 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13552 gcc_unreachable ();
13555 if (GET_MODE (part[1][0]) == SImode)
13556 part[1][0] = part[1][1];
13559 emit_move_insn (part[0][1], part[1][1]);
13560 emit_move_insn (part[0][0], part[1][0]);
13564 /* Choose correct order to not overwrite the source before it is copied. */
13565 if ((REG_P (part[0][0])
13566 && REG_P (part[1][1])
13567 && (REGNO (part[0][0]) == REGNO (part[1][1])
13569 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13571 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13575 operands[2] = part[0][2];
13576 operands[3] = part[0][1];
13577 operands[4] = part[0][0];
13578 operands[5] = part[1][2];
13579 operands[6] = part[1][1];
13580 operands[7] = part[1][0];
13584 operands[2] = part[0][1];
13585 operands[3] = part[0][0];
13586 operands[5] = part[1][1];
13587 operands[6] = part[1][0];
13594 operands[2] = part[0][0];
13595 operands[3] = part[0][1];
13596 operands[4] = part[0][2];
13597 operands[5] = part[1][0];
13598 operands[6] = part[1][1];
13599 operands[7] = part[1][2];
13603 operands[2] = part[0][0];
13604 operands[3] = part[0][1];
13605 operands[5] = part[1][0];
13606 operands[6] = part[1][1];
13610 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13613 if (CONST_INT_P (operands[5])
13614 && operands[5] != const0_rtx
13615 && REG_P (operands[2]))
13617 if (CONST_INT_P (operands[6])
13618 && INTVAL (operands[6]) == INTVAL (operands[5]))
13619 operands[6] = operands[2];
13622 && CONST_INT_P (operands[7])
13623 && INTVAL (operands[7]) == INTVAL (operands[5]))
13624 operands[7] = operands[2];
13628 && CONST_INT_P (operands[6])
13629 && operands[6] != const0_rtx
13630 && REG_P (operands[3])
13631 && CONST_INT_P (operands[7])
13632 && INTVAL (operands[7]) == INTVAL (operands[6]))
13633 operands[7] = operands[3];
13636 emit_move_insn (operands[2], operands[5]);
13637 emit_move_insn (operands[3], operands[6]);
13639 emit_move_insn (operands[4], operands[7]);
13644 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13645 left shift by a constant, either using a single shift or
13646 a sequence of add instructions. */
13649 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13653 emit_insn ((mode == DImode
13655 : gen_adddi3) (operand, operand, operand));
13657 else if (!optimize_size
13658 && count * ix86_cost->add <= ix86_cost->shift_const)
13661 for (i=0; i<count; i++)
13663 emit_insn ((mode == DImode
13665 : gen_adddi3) (operand, operand, operand));
13669 emit_insn ((mode == DImode
13671 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13675 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13677 rtx low[2], high[2];
13679 const int single_width = mode == DImode ? 32 : 64;
13681 if (CONST_INT_P (operands[2]))
13683 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13684 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13686 if (count >= single_width)
13688 emit_move_insn (high[0], low[1]);
13689 emit_move_insn (low[0], const0_rtx);
13691 if (count > single_width)
13692 ix86_expand_ashl_const (high[0], count - single_width, mode);
13696 if (!rtx_equal_p (operands[0], operands[1]))
13697 emit_move_insn (operands[0], operands[1]);
13698 emit_insn ((mode == DImode
13700 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13701 ix86_expand_ashl_const (low[0], count, mode);
13706 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13708 if (operands[1] == const1_rtx)
13710 /* Assuming we've chosen a QImode capable registers, then 1 << N
13711 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13712 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13714 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13716 ix86_expand_clear (low[0]);
13717 ix86_expand_clear (high[0]);
13718 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13720 d = gen_lowpart (QImode, low[0]);
13721 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13722 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13723 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13725 d = gen_lowpart (QImode, high[0]);
13726 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13727 s = gen_rtx_NE (QImode, flags, const0_rtx);
13728 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13731 /* Otherwise, we can get the same results by manually performing
13732 a bit extract operation on bit 5/6, and then performing the two
13733 shifts. The two methods of getting 0/1 into low/high are exactly
13734 the same size. Avoiding the shift in the bit extract case helps
13735 pentium4 a bit; no one else seems to care much either way. */
13740 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13741 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13743 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13744 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13746 emit_insn ((mode == DImode
13748 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13749 emit_insn ((mode == DImode
13751 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13752 emit_move_insn (low[0], high[0]);
13753 emit_insn ((mode == DImode
13755 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13758 emit_insn ((mode == DImode
13760 : gen_ashldi3) (low[0], low[0], operands[2]));
13761 emit_insn ((mode == DImode
13763 : gen_ashldi3) (high[0], high[0], operands[2]));
13767 if (operands[1] == constm1_rtx)
13769 /* For -1 << N, we can avoid the shld instruction, because we
13770 know that we're shifting 0...31/63 ones into a -1. */
13771 emit_move_insn (low[0], constm1_rtx);
13773 emit_move_insn (high[0], low[0]);
13775 emit_move_insn (high[0], constm1_rtx);
13779 if (!rtx_equal_p (operands[0], operands[1]))
13780 emit_move_insn (operands[0], operands[1]);
13782 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13783 emit_insn ((mode == DImode
13785 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13788 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13790 if (TARGET_CMOVE && scratch)
13792 ix86_expand_clear (scratch);
13793 emit_insn ((mode == DImode
13794 ? gen_x86_shift_adj_1
13795 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13798 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13802 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13804 rtx low[2], high[2];
13806 const int single_width = mode == DImode ? 32 : 64;
13808 if (CONST_INT_P (operands[2]))
13810 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13811 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13813 if (count == single_width * 2 - 1)
13815 emit_move_insn (high[0], high[1]);
13816 emit_insn ((mode == DImode
13818 : gen_ashrdi3) (high[0], high[0],
13819 GEN_INT (single_width - 1)));
13820 emit_move_insn (low[0], high[0]);
13823 else if (count >= single_width)
13825 emit_move_insn (low[0], high[1]);
13826 emit_move_insn (high[0], low[0]);
13827 emit_insn ((mode == DImode
13829 : gen_ashrdi3) (high[0], high[0],
13830 GEN_INT (single_width - 1)));
13831 if (count > single_width)
13832 emit_insn ((mode == DImode
13834 : gen_ashrdi3) (low[0], low[0],
13835 GEN_INT (count - single_width)));
13839 if (!rtx_equal_p (operands[0], operands[1]))
13840 emit_move_insn (operands[0], operands[1]);
13841 emit_insn ((mode == DImode
13843 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13844 emit_insn ((mode == DImode
13846 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13851 if (!rtx_equal_p (operands[0], operands[1]))
13852 emit_move_insn (operands[0], operands[1]);
13854 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13856 emit_insn ((mode == DImode
13858 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13859 emit_insn ((mode == DImode
13861 : gen_ashrdi3) (high[0], high[0], operands[2]));
13863 if (TARGET_CMOVE && scratch)
13865 emit_move_insn (scratch, high[0]);
13866 emit_insn ((mode == DImode
13868 : gen_ashrdi3) (scratch, scratch,
13869 GEN_INT (single_width - 1)));
13870 emit_insn ((mode == DImode
13871 ? gen_x86_shift_adj_1
13872 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13876 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13881 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13883 rtx low[2], high[2];
13885 const int single_width = mode == DImode ? 32 : 64;
13887 if (CONST_INT_P (operands[2]))
13889 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13890 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13892 if (count >= single_width)
13894 emit_move_insn (low[0], high[1]);
13895 ix86_expand_clear (high[0]);
13897 if (count > single_width)
13898 emit_insn ((mode == DImode
13900 : gen_lshrdi3) (low[0], low[0],
13901 GEN_INT (count - single_width)));
13905 if (!rtx_equal_p (operands[0], operands[1]))
13906 emit_move_insn (operands[0], operands[1]);
13907 emit_insn ((mode == DImode
13909 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13910 emit_insn ((mode == DImode
13912 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13917 if (!rtx_equal_p (operands[0], operands[1]))
13918 emit_move_insn (operands[0], operands[1]);
13920 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13922 emit_insn ((mode == DImode
13924 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13925 emit_insn ((mode == DImode
13927 : gen_lshrdi3) (high[0], high[0], operands[2]));
13929 /* Heh. By reversing the arguments, we can reuse this pattern. */
13930 if (TARGET_CMOVE && scratch)
13932 ix86_expand_clear (scratch);
13933 emit_insn ((mode == DImode
13934 ? gen_x86_shift_adj_1
13935 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13939 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13943 /* Predict just emitted jump instruction to be taken with probability PROB. */
13945 predict_jump (int prob)
13947 rtx insn = get_last_insn ();
13948 gcc_assert (JUMP_P (insn));
13950 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13955 /* Helper function for the string operations below. Dest VARIABLE whether
13956 it is aligned to VALUE bytes. If true, jump to the label. */
13958 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13960 rtx label = gen_label_rtx ();
13961 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13962 if (GET_MODE (variable) == DImode)
13963 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13965 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13966 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13969 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13971 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13975 /* Adjust COUNTER by the VALUE. */
13977 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13979 if (GET_MODE (countreg) == DImode)
13980 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13982 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13985 /* Zero extend possibly SImode EXP to Pmode register. */
13987 ix86_zero_extend_to_Pmode (rtx exp)
13990 if (GET_MODE (exp) == VOIDmode)
13991 return force_reg (Pmode, exp);
13992 if (GET_MODE (exp) == Pmode)
13993 return copy_to_mode_reg (Pmode, exp);
13994 r = gen_reg_rtx (Pmode);
13995 emit_insn (gen_zero_extendsidi2 (r, exp));
13999 /* Divide COUNTREG by SCALE. */
14001 scale_counter (rtx countreg, int scale)
14004 rtx piece_size_mask;
14008 if (CONST_INT_P (countreg))
14009 return GEN_INT (INTVAL (countreg) / scale);
14010 gcc_assert (REG_P (countreg));
14012 piece_size_mask = GEN_INT (scale - 1);
14013 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14014 GEN_INT (exact_log2 (scale)),
14015 NULL, 1, OPTAB_DIRECT);
14019 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14020 DImode for constant loop counts. */
14022 static enum machine_mode
14023 counter_mode (rtx count_exp)
14025 if (GET_MODE (count_exp) != VOIDmode)
14026 return GET_MODE (count_exp);
14027 if (GET_CODE (count_exp) != CONST_INT)
14029 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14034 /* When SRCPTR is non-NULL, output simple loop to move memory
14035 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14036 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14037 equivalent loop to set memory by VALUE (supposed to be in MODE).
14039 The size is rounded down to whole number of chunk size moved at once.
14040 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14044 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14045 rtx destptr, rtx srcptr, rtx value,
14046 rtx count, enum machine_mode mode, int unroll,
14049 rtx out_label, top_label, iter, tmp;
14050 enum machine_mode iter_mode = counter_mode (count);
14051 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14052 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14058 top_label = gen_label_rtx ();
14059 out_label = gen_label_rtx ();
14060 iter = gen_reg_rtx (iter_mode);
14062 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14063 NULL, 1, OPTAB_DIRECT);
14064 /* Those two should combine. */
14065 if (piece_size == const1_rtx)
14067 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14069 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14071 emit_move_insn (iter, const0_rtx);
14073 emit_label (top_label);
14075 tmp = convert_modes (Pmode, iter_mode, iter, true);
14076 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14077 destmem = change_address (destmem, mode, x_addr);
14081 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14082 srcmem = change_address (srcmem, mode, y_addr);
14084 /* When unrolling for chips that reorder memory reads and writes,
14085 we can save registers by using single temporary.
14086 Also using 4 temporaries is overkill in 32bit mode. */
14087 if (!TARGET_64BIT && 0)
14089 for (i = 0; i < unroll; i++)
14094 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14096 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14098 emit_move_insn (destmem, srcmem);
14104 gcc_assert (unroll <= 4);
14105 for (i = 0; i < unroll; i++)
14107 tmpreg[i] = gen_reg_rtx (mode);
14111 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14113 emit_move_insn (tmpreg[i], srcmem);
14115 for (i = 0; i < unroll; i++)
14120 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14122 emit_move_insn (destmem, tmpreg[i]);
14127 for (i = 0; i < unroll; i++)
14131 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14132 emit_move_insn (destmem, value);
14135 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14136 true, OPTAB_LIB_WIDEN);
14138 emit_move_insn (iter, tmp);
14140 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14142 if (expected_size != -1)
14144 expected_size /= GET_MODE_SIZE (mode) * unroll;
14145 if (expected_size == 0)
14147 else if (expected_size > REG_BR_PROB_BASE)
14148 predict_jump (REG_BR_PROB_BASE - 1);
14150 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14153 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14154 iter = ix86_zero_extend_to_Pmode (iter);
14155 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14156 true, OPTAB_LIB_WIDEN);
14157 if (tmp != destptr)
14158 emit_move_insn (destptr, tmp);
14161 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14162 true, OPTAB_LIB_WIDEN);
14164 emit_move_insn (srcptr, tmp);
14166 emit_label (out_label);
14169 /* Output "rep; mov" instruction.
14170 Arguments have same meaning as for previous function */
14172 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14173 rtx destptr, rtx srcptr,
14175 enum machine_mode mode)
14181 /* If the size is known, it is shorter to use rep movs. */
14182 if (mode == QImode && CONST_INT_P (count)
14183 && !(INTVAL (count) & 3))
14186 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14187 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14188 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14189 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14190 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14191 if (mode != QImode)
14193 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14194 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14195 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14196 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14197 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14198 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14202 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14203 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14205 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14209 /* Output "rep; stos" instruction.
14210 Arguments have same meaning as for previous function */
14212 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14214 enum machine_mode mode)
14219 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14220 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14221 value = force_reg (mode, gen_lowpart (mode, value));
14222 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14223 if (mode != QImode)
14225 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14226 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14227 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14230 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14231 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14235 emit_strmov (rtx destmem, rtx srcmem,
14236 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14238 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14239 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14240 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14243 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14245 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14246 rtx destptr, rtx srcptr, rtx count, int max_size)
14249 if (CONST_INT_P (count))
14251 HOST_WIDE_INT countval = INTVAL (count);
14254 if ((countval & 0x10) && max_size > 16)
14258 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14259 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14262 gcc_unreachable ();
14265 if ((countval & 0x08) && max_size > 8)
14268 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14271 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14272 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14276 if ((countval & 0x04) && max_size > 4)
14278 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14281 if ((countval & 0x02) && max_size > 2)
14283 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14286 if ((countval & 0x01) && max_size > 1)
14288 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14295 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14296 count, 1, OPTAB_DIRECT);
14297 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14298 count, QImode, 1, 4);
14302 /* When there are stringops, we can cheaply increase dest and src pointers.
14303 Otherwise we save code size by maintaining offset (zero is readily
14304 available from preceding rep operation) and using x86 addressing modes.
14306 if (TARGET_SINGLE_STRINGOP)
14310 rtx label = ix86_expand_aligntest (count, 4, true);
14311 src = change_address (srcmem, SImode, srcptr);
14312 dest = change_address (destmem, SImode, destptr);
14313 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14314 emit_label (label);
14315 LABEL_NUSES (label) = 1;
14319 rtx label = ix86_expand_aligntest (count, 2, true);
14320 src = change_address (srcmem, HImode, srcptr);
14321 dest = change_address (destmem, HImode, destptr);
14322 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14323 emit_label (label);
14324 LABEL_NUSES (label) = 1;
14328 rtx label = ix86_expand_aligntest (count, 1, true);
14329 src = change_address (srcmem, QImode, srcptr);
14330 dest = change_address (destmem, QImode, destptr);
14331 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14332 emit_label (label);
14333 LABEL_NUSES (label) = 1;
14338 rtx offset = force_reg (Pmode, const0_rtx);
14343 rtx label = ix86_expand_aligntest (count, 4, true);
14344 src = change_address (srcmem, SImode, srcptr);
14345 dest = change_address (destmem, SImode, destptr);
14346 emit_move_insn (dest, src);
14347 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14348 true, OPTAB_LIB_WIDEN);
14350 emit_move_insn (offset, tmp);
14351 emit_label (label);
14352 LABEL_NUSES (label) = 1;
14356 rtx label = ix86_expand_aligntest (count, 2, true);
14357 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14358 src = change_address (srcmem, HImode, tmp);
14359 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14360 dest = change_address (destmem, HImode, tmp);
14361 emit_move_insn (dest, src);
14362 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14363 true, OPTAB_LIB_WIDEN);
14365 emit_move_insn (offset, tmp);
14366 emit_label (label);
14367 LABEL_NUSES (label) = 1;
14371 rtx label = ix86_expand_aligntest (count, 1, true);
14372 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14373 src = change_address (srcmem, QImode, tmp);
14374 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14375 dest = change_address (destmem, QImode, tmp);
14376 emit_move_insn (dest, src);
14377 emit_label (label);
14378 LABEL_NUSES (label) = 1;
14383 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14385 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14386 rtx count, int max_size)
14389 expand_simple_binop (counter_mode (count), AND, count,
14390 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14391 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14392 gen_lowpart (QImode, value), count, QImode,
14396 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14398 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14402 if (CONST_INT_P (count))
14404 HOST_WIDE_INT countval = INTVAL (count);
14407 if ((countval & 0x10) && max_size > 16)
14411 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14412 emit_insn (gen_strset (destptr, dest, value));
14413 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14414 emit_insn (gen_strset (destptr, dest, value));
14417 gcc_unreachable ();
14420 if ((countval & 0x08) && max_size > 8)
14424 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14425 emit_insn (gen_strset (destptr, dest, value));
14429 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14430 emit_insn (gen_strset (destptr, dest, value));
14431 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14432 emit_insn (gen_strset (destptr, dest, value));
14436 if ((countval & 0x04) && max_size > 4)
14438 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14439 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14442 if ((countval & 0x02) && max_size > 2)
14444 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14445 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14448 if ((countval & 0x01) && max_size > 1)
14450 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14451 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14458 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14463 rtx label = ix86_expand_aligntest (count, 16, true);
14466 dest = change_address (destmem, DImode, destptr);
14467 emit_insn (gen_strset (destptr, dest, value));
14468 emit_insn (gen_strset (destptr, dest, value));
14472 dest = change_address (destmem, SImode, destptr);
14473 emit_insn (gen_strset (destptr, dest, value));
14474 emit_insn (gen_strset (destptr, dest, value));
14475 emit_insn (gen_strset (destptr, dest, value));
14476 emit_insn (gen_strset (destptr, dest, value));
14478 emit_label (label);
14479 LABEL_NUSES (label) = 1;
14483 rtx label = ix86_expand_aligntest (count, 8, true);
14486 dest = change_address (destmem, DImode, destptr);
14487 emit_insn (gen_strset (destptr, dest, value));
14491 dest = change_address (destmem, SImode, destptr);
14492 emit_insn (gen_strset (destptr, dest, value));
14493 emit_insn (gen_strset (destptr, dest, value));
14495 emit_label (label);
14496 LABEL_NUSES (label) = 1;
14500 rtx label = ix86_expand_aligntest (count, 4, true);
14501 dest = change_address (destmem, SImode, destptr);
14502 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14503 emit_label (label);
14504 LABEL_NUSES (label) = 1;
14508 rtx label = ix86_expand_aligntest (count, 2, true);
14509 dest = change_address (destmem, HImode, destptr);
14510 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14511 emit_label (label);
14512 LABEL_NUSES (label) = 1;
14516 rtx label = ix86_expand_aligntest (count, 1, true);
14517 dest = change_address (destmem, QImode, destptr);
14518 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14519 emit_label (label);
14520 LABEL_NUSES (label) = 1;
14524 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14525 DESIRED_ALIGNMENT. */
14527 expand_movmem_prologue (rtx destmem, rtx srcmem,
14528 rtx destptr, rtx srcptr, rtx count,
14529 int align, int desired_alignment)
14531 if (align <= 1 && desired_alignment > 1)
14533 rtx label = ix86_expand_aligntest (destptr, 1, false);
14534 srcmem = change_address (srcmem, QImode, srcptr);
14535 destmem = change_address (destmem, QImode, destptr);
14536 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14537 ix86_adjust_counter (count, 1);
14538 emit_label (label);
14539 LABEL_NUSES (label) = 1;
14541 if (align <= 2 && desired_alignment > 2)
14543 rtx label = ix86_expand_aligntest (destptr, 2, false);
14544 srcmem = change_address (srcmem, HImode, srcptr);
14545 destmem = change_address (destmem, HImode, destptr);
14546 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14547 ix86_adjust_counter (count, 2);
14548 emit_label (label);
14549 LABEL_NUSES (label) = 1;
14551 if (align <= 4 && desired_alignment > 4)
14553 rtx label = ix86_expand_aligntest (destptr, 4, false);
14554 srcmem = change_address (srcmem, SImode, srcptr);
14555 destmem = change_address (destmem, SImode, destptr);
14556 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14557 ix86_adjust_counter (count, 4);
14558 emit_label (label);
14559 LABEL_NUSES (label) = 1;
14561 gcc_assert (desired_alignment <= 8);
14564 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14565 DESIRED_ALIGNMENT. */
14567 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14568 int align, int desired_alignment)
14570 if (align <= 1 && desired_alignment > 1)
14572 rtx label = ix86_expand_aligntest (destptr, 1, false);
14573 destmem = change_address (destmem, QImode, destptr);
14574 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14575 ix86_adjust_counter (count, 1);
14576 emit_label (label);
14577 LABEL_NUSES (label) = 1;
14579 if (align <= 2 && desired_alignment > 2)
14581 rtx label = ix86_expand_aligntest (destptr, 2, false);
14582 destmem = change_address (destmem, HImode, destptr);
14583 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14584 ix86_adjust_counter (count, 2);
14585 emit_label (label);
14586 LABEL_NUSES (label) = 1;
14588 if (align <= 4 && desired_alignment > 4)
14590 rtx label = ix86_expand_aligntest (destptr, 4, false);
14591 destmem = change_address (destmem, SImode, destptr);
14592 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14593 ix86_adjust_counter (count, 4);
14594 emit_label (label);
14595 LABEL_NUSES (label) = 1;
14597 gcc_assert (desired_alignment <= 8);
14600 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14601 static enum stringop_alg
14602 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14603 int *dynamic_check)
14605 const struct stringop_algs * algs;
14607 *dynamic_check = -1;
14609 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14611 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14612 if (stringop_alg != no_stringop)
14613 return stringop_alg;
14614 /* rep; movq or rep; movl is the smallest variant. */
14615 else if (optimize_size)
14617 if (!count || (count & 3))
14618 return rep_prefix_1_byte;
14620 return rep_prefix_4_byte;
14622 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14624 else if (expected_size != -1 && expected_size < 4)
14625 return loop_1_byte;
14626 else if (expected_size != -1)
14629 enum stringop_alg alg = libcall;
14630 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14632 gcc_assert (algs->size[i].max);
14633 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14635 if (algs->size[i].alg != libcall)
14636 alg = algs->size[i].alg;
14637 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14638 last non-libcall inline algorithm. */
14639 if (TARGET_INLINE_ALL_STRINGOPS)
14641 /* When the current size is best to be copied by a libcall,
14642 but we are still forced to inline, run the heuristic bellow
14643 that will pick code for medium sized blocks. */
14644 if (alg != libcall)
14649 return algs->size[i].alg;
14652 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14654 /* When asked to inline the call anyway, try to pick meaningful choice.
14655 We look for maximal size of block that is faster to copy by hand and
14656 take blocks of at most of that size guessing that average size will
14657 be roughly half of the block.
14659 If this turns out to be bad, we might simply specify the preferred
14660 choice in ix86_costs. */
14661 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14662 && algs->unknown_size == libcall)
14665 enum stringop_alg alg;
14668 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14669 if (algs->size[i].alg != libcall && algs->size[i].alg)
14670 max = algs->size[i].max;
14673 alg = decide_alg (count, max / 2, memset, dynamic_check);
14674 gcc_assert (*dynamic_check == -1);
14675 gcc_assert (alg != libcall);
14676 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14677 *dynamic_check = max;
14680 return algs->unknown_size;
14683 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14684 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14686 decide_alignment (int align,
14687 enum stringop_alg alg,
14690 int desired_align = 0;
14694 gcc_unreachable ();
14696 case unrolled_loop:
14697 desired_align = GET_MODE_SIZE (Pmode);
14699 case rep_prefix_8_byte:
14702 case rep_prefix_4_byte:
14703 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14704 copying whole cacheline at once. */
14705 if (TARGET_PENTIUMPRO)
14710 case rep_prefix_1_byte:
14711 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14712 copying whole cacheline at once. */
14713 if (TARGET_PENTIUMPRO)
14727 if (desired_align < align)
14728 desired_align = align;
14729 if (expected_size != -1 && expected_size < 4)
14730 desired_align = align;
14731 return desired_align;
14734 /* Return the smallest power of 2 greater than VAL. */
14736 smallest_pow2_greater_than (int val)
14744 /* Expand string move (memcpy) operation. Use i386 string operations when
14745 profitable. expand_clrmem contains similar code. The code depends upon
14746 architecture, block size and alignment, but always has the same
14749 1) Prologue guard: Conditional that jumps up to epilogues for small
14750 blocks that can be handled by epilogue alone. This is faster but
14751 also needed for correctness, since prologue assume the block is larger
14752 than the desired alignment.
14754 Optional dynamic check for size and libcall for large
14755 blocks is emitted here too, with -minline-stringops-dynamically.
14757 2) Prologue: copy first few bytes in order to get destination aligned
14758 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14759 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14760 We emit either a jump tree on power of two sized blocks, or a byte loop.
14762 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14763 with specified algorithm.
14765 4) Epilogue: code copying tail of the block that is too small to be
14766 handled by main body (or up to size guarded by prologue guard). */
14769 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14770 rtx expected_align_exp, rtx expected_size_exp)
14776 rtx jump_around_label = NULL;
14777 HOST_WIDE_INT align = 1;
14778 unsigned HOST_WIDE_INT count = 0;
14779 HOST_WIDE_INT expected_size = -1;
14780 int size_needed = 0, epilogue_size_needed;
14781 int desired_align = 0;
14782 enum stringop_alg alg;
14785 if (CONST_INT_P (align_exp))
14786 align = INTVAL (align_exp);
14787 /* i386 can do misaligned access on reasonably increased cost. */
14788 if (CONST_INT_P (expected_align_exp)
14789 && INTVAL (expected_align_exp) > align)
14790 align = INTVAL (expected_align_exp);
14791 if (CONST_INT_P (count_exp))
14792 count = expected_size = INTVAL (count_exp);
14793 if (CONST_INT_P (expected_size_exp) && count == 0)
14794 expected_size = INTVAL (expected_size_exp);
14796 /* Step 0: Decide on preferred algorithm, desired alignment and
14797 size of chunks to be copied by main loop. */
14799 alg = decide_alg (count, expected_size, false, &dynamic_check);
14800 desired_align = decide_alignment (align, alg, expected_size);
14802 if (!TARGET_ALIGN_STRINGOPS)
14803 align = desired_align;
14805 if (alg == libcall)
14807 gcc_assert (alg != no_stringop);
14809 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14810 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14811 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14816 gcc_unreachable ();
14818 size_needed = GET_MODE_SIZE (Pmode);
14820 case unrolled_loop:
14821 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14823 case rep_prefix_8_byte:
14826 case rep_prefix_4_byte:
14829 case rep_prefix_1_byte:
14835 epilogue_size_needed = size_needed;
14837 /* Step 1: Prologue guard. */
14839 /* Alignment code needs count to be in register. */
14840 if (CONST_INT_P (count_exp) && desired_align > align)
14842 enum machine_mode mode = SImode;
14843 if (TARGET_64BIT && (count & ~0xffffffff))
14845 count_exp = force_reg (mode, count_exp);
14847 gcc_assert (desired_align >= 1 && align >= 1);
14849 /* Ensure that alignment prologue won't copy past end of block. */
14850 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14852 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14853 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14854 Make sure it is power of 2. */
14855 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14857 label = gen_label_rtx ();
14858 emit_cmp_and_jump_insns (count_exp,
14859 GEN_INT (epilogue_size_needed),
14860 LTU, 0, counter_mode (count_exp), 1, label);
14861 if (GET_CODE (count_exp) == CONST_INT)
14863 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14864 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14866 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14868 /* Emit code to decide on runtime whether library call or inline should be
14870 if (dynamic_check != -1)
14872 rtx hot_label = gen_label_rtx ();
14873 jump_around_label = gen_label_rtx ();
14874 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14875 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14876 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14877 emit_block_move_via_libcall (dst, src, count_exp, false);
14878 emit_jump (jump_around_label);
14879 emit_label (hot_label);
14882 /* Step 2: Alignment prologue. */
14884 if (desired_align > align)
14886 /* Except for the first move in epilogue, we no longer know
14887 constant offset in aliasing info. It don't seems to worth
14888 the pain to maintain it for the first move, so throw away
14890 src = change_address (src, BLKmode, srcreg);
14891 dst = change_address (dst, BLKmode, destreg);
14892 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14895 if (label && size_needed == 1)
14897 emit_label (label);
14898 LABEL_NUSES (label) = 1;
14902 /* Step 3: Main loop. */
14908 gcc_unreachable ();
14910 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14911 count_exp, QImode, 1, expected_size);
14914 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14915 count_exp, Pmode, 1, expected_size);
14917 case unrolled_loop:
14918 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14919 registers for 4 temporaries anyway. */
14920 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14921 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14924 case rep_prefix_8_byte:
14925 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14928 case rep_prefix_4_byte:
14929 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14932 case rep_prefix_1_byte:
14933 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14937 /* Adjust properly the offset of src and dest memory for aliasing. */
14938 if (CONST_INT_P (count_exp))
14940 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14941 (count / size_needed) * size_needed);
14942 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14943 (count / size_needed) * size_needed);
14947 src = change_address (src, BLKmode, srcreg);
14948 dst = change_address (dst, BLKmode, destreg);
14951 /* Step 4: Epilogue to copy the remaining bytes. */
14955 /* When the main loop is done, COUNT_EXP might hold original count,
14956 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14957 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14958 bytes. Compensate if needed. */
14960 if (size_needed < epilogue_size_needed)
14963 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14964 GEN_INT (size_needed - 1), count_exp, 1,
14966 if (tmp != count_exp)
14967 emit_move_insn (count_exp, tmp);
14969 emit_label (label);
14970 LABEL_NUSES (label) = 1;
14973 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14974 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14975 epilogue_size_needed);
14976 if (jump_around_label)
14977 emit_label (jump_around_label);
14981 /* Helper function for memcpy. For QImode value 0xXY produce
14982 0xXYXYXYXY of wide specified by MODE. This is essentially
14983 a * 0x10101010, but we can do slightly better than
14984 synth_mult by unwinding the sequence by hand on CPUs with
14987 promote_duplicated_reg (enum machine_mode mode, rtx val)
14989 enum machine_mode valmode = GET_MODE (val);
14991 int nops = mode == DImode ? 3 : 2;
14993 gcc_assert (mode == SImode || mode == DImode);
14994 if (val == const0_rtx)
14995 return copy_to_mode_reg (mode, const0_rtx);
14996 if (CONST_INT_P (val))
14998 HOST_WIDE_INT v = INTVAL (val) & 255;
15002 if (mode == DImode)
15003 v |= (v << 16) << 16;
15004 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15007 if (valmode == VOIDmode)
15009 if (valmode != QImode)
15010 val = gen_lowpart (QImode, val);
15011 if (mode == QImode)
15013 if (!TARGET_PARTIAL_REG_STALL)
15015 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15016 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15017 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15018 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15020 rtx reg = convert_modes (mode, QImode, val, true);
15021 tmp = promote_duplicated_reg (mode, const1_rtx);
15022 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15027 rtx reg = convert_modes (mode, QImode, val, true);
15029 if (!TARGET_PARTIAL_REG_STALL)
15030 if (mode == SImode)
15031 emit_insn (gen_movsi_insv_1 (reg, reg));
15033 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15036 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15037 NULL, 1, OPTAB_DIRECT);
15039 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15041 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15042 NULL, 1, OPTAB_DIRECT);
15043 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15044 if (mode == SImode)
15046 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15047 NULL, 1, OPTAB_DIRECT);
15048 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15053 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15054 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15055 alignment from ALIGN to DESIRED_ALIGN. */
15057 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15062 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15063 promoted_val = promote_duplicated_reg (DImode, val);
15064 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15065 promoted_val = promote_duplicated_reg (SImode, val);
15066 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15067 promoted_val = promote_duplicated_reg (HImode, val);
15069 promoted_val = val;
15071 return promoted_val;
15074 /* Expand string clear operation (bzero). Use i386 string operations when
15075 profitable. See expand_movmem comment for explanation of individual
15076 steps performed. */
15078 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15079 rtx expected_align_exp, rtx expected_size_exp)
15084 rtx jump_around_label = NULL;
15085 HOST_WIDE_INT align = 1;
15086 unsigned HOST_WIDE_INT count = 0;
15087 HOST_WIDE_INT expected_size = -1;
15088 int size_needed = 0, epilogue_size_needed;
15089 int desired_align = 0;
15090 enum stringop_alg alg;
15091 rtx promoted_val = NULL;
15092 bool force_loopy_epilogue = false;
15095 if (CONST_INT_P (align_exp))
15096 align = INTVAL (align_exp);
15097 /* i386 can do misaligned access on reasonably increased cost. */
15098 if (CONST_INT_P (expected_align_exp)
15099 && INTVAL (expected_align_exp) > align)
15100 align = INTVAL (expected_align_exp);
15101 if (CONST_INT_P (count_exp))
15102 count = expected_size = INTVAL (count_exp);
15103 if (CONST_INT_P (expected_size_exp) && count == 0)
15104 expected_size = INTVAL (expected_size_exp);
15106 /* Step 0: Decide on preferred algorithm, desired alignment and
15107 size of chunks to be copied by main loop. */
15109 alg = decide_alg (count, expected_size, true, &dynamic_check);
15110 desired_align = decide_alignment (align, alg, expected_size);
15112 if (!TARGET_ALIGN_STRINGOPS)
15113 align = desired_align;
15115 if (alg == libcall)
15117 gcc_assert (alg != no_stringop);
15119 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15120 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15125 gcc_unreachable ();
15127 size_needed = GET_MODE_SIZE (Pmode);
15129 case unrolled_loop:
15130 size_needed = GET_MODE_SIZE (Pmode) * 4;
15132 case rep_prefix_8_byte:
15135 case rep_prefix_4_byte:
15138 case rep_prefix_1_byte:
15143 epilogue_size_needed = size_needed;
15145 /* Step 1: Prologue guard. */
15147 /* Alignment code needs count to be in register. */
15148 if (CONST_INT_P (count_exp) && desired_align > align)
15150 enum machine_mode mode = SImode;
15151 if (TARGET_64BIT && (count & ~0xffffffff))
15153 count_exp = force_reg (mode, count_exp);
15155 /* Do the cheap promotion to allow better CSE across the
15156 main loop and epilogue (ie one load of the big constant in the
15157 front of all code. */
15158 if (CONST_INT_P (val_exp))
15159 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15160 desired_align, align);
15161 /* Ensure that alignment prologue won't copy past end of block. */
15162 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15164 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15165 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15166 Make sure it is power of 2. */
15167 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15169 /* To improve performance of small blocks, we jump around the VAL
15170 promoting mode. This mean that if the promoted VAL is not constant,
15171 we might not use it in the epilogue and have to use byte
15173 if (epilogue_size_needed > 2 && !promoted_val)
15174 force_loopy_epilogue = true;
15175 label = gen_label_rtx ();
15176 emit_cmp_and_jump_insns (count_exp,
15177 GEN_INT (epilogue_size_needed),
15178 LTU, 0, counter_mode (count_exp), 1, label);
15179 if (GET_CODE (count_exp) == CONST_INT)
15181 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15182 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15184 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15186 if (dynamic_check != -1)
15188 rtx hot_label = gen_label_rtx ();
15189 jump_around_label = gen_label_rtx ();
15190 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15191 LEU, 0, counter_mode (count_exp), 1, hot_label);
15192 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15193 set_storage_via_libcall (dst, count_exp, val_exp, false);
15194 emit_jump (jump_around_label);
15195 emit_label (hot_label);
15198 /* Step 2: Alignment prologue. */
15200 /* Do the expensive promotion once we branched off the small blocks. */
15202 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15203 desired_align, align);
15204 gcc_assert (desired_align >= 1 && align >= 1);
15206 if (desired_align > align)
15208 /* Except for the first move in epilogue, we no longer know
15209 constant offset in aliasing info. It don't seems to worth
15210 the pain to maintain it for the first move, so throw away
15212 dst = change_address (dst, BLKmode, destreg);
15213 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15216 if (label && size_needed == 1)
15218 emit_label (label);
15219 LABEL_NUSES (label) = 1;
15223 /* Step 3: Main loop. */
15229 gcc_unreachable ();
15231 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15232 count_exp, QImode, 1, expected_size);
15235 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15236 count_exp, Pmode, 1, expected_size);
15238 case unrolled_loop:
15239 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15240 count_exp, Pmode, 4, expected_size);
15242 case rep_prefix_8_byte:
15243 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15246 case rep_prefix_4_byte:
15247 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15250 case rep_prefix_1_byte:
15251 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15255 /* Adjust properly the offset of src and dest memory for aliasing. */
15256 if (CONST_INT_P (count_exp))
15257 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15258 (count / size_needed) * size_needed);
15260 dst = change_address (dst, BLKmode, destreg);
15262 /* Step 4: Epilogue to copy the remaining bytes. */
15266 /* When the main loop is done, COUNT_EXP might hold original count,
15267 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15268 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15269 bytes. Compensate if needed. */
15271 if (size_needed < desired_align - align)
15274 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15275 GEN_INT (size_needed - 1), count_exp, 1,
15277 size_needed = desired_align - align + 1;
15278 if (tmp != count_exp)
15279 emit_move_insn (count_exp, tmp);
15281 emit_label (label);
15282 LABEL_NUSES (label) = 1;
15284 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15286 if (force_loopy_epilogue)
15287 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15290 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15293 if (jump_around_label)
15294 emit_label (jump_around_label);
15298 /* Expand the appropriate insns for doing strlen if not just doing
15301 out = result, initialized with the start address
15302 align_rtx = alignment of the address.
15303 scratch = scratch register, initialized with the startaddress when
15304 not aligned, otherwise undefined
15306 This is just the body. It needs the initializations mentioned above and
15307 some address computing at the end. These things are done in i386.md. */
15310 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15314 rtx align_2_label = NULL_RTX;
15315 rtx align_3_label = NULL_RTX;
15316 rtx align_4_label = gen_label_rtx ();
15317 rtx end_0_label = gen_label_rtx ();
15319 rtx tmpreg = gen_reg_rtx (SImode);
15320 rtx scratch = gen_reg_rtx (SImode);
15324 if (CONST_INT_P (align_rtx))
15325 align = INTVAL (align_rtx);
15327 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15329 /* Is there a known alignment and is it less than 4? */
15332 rtx scratch1 = gen_reg_rtx (Pmode);
15333 emit_move_insn (scratch1, out);
15334 /* Is there a known alignment and is it not 2? */
15337 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15338 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15340 /* Leave just the 3 lower bits. */
15341 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15342 NULL_RTX, 0, OPTAB_WIDEN);
15344 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15345 Pmode, 1, align_4_label);
15346 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15347 Pmode, 1, align_2_label);
15348 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15349 Pmode, 1, align_3_label);
15353 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15354 check if is aligned to 4 - byte. */
15356 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15357 NULL_RTX, 0, OPTAB_WIDEN);
15359 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15360 Pmode, 1, align_4_label);
15363 mem = change_address (src, QImode, out);
15365 /* Now compare the bytes. */
15367 /* Compare the first n unaligned byte on a byte per byte basis. */
15368 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15369 QImode, 1, end_0_label);
15371 /* Increment the address. */
15373 emit_insn (gen_adddi3 (out, out, const1_rtx));
15375 emit_insn (gen_addsi3 (out, out, const1_rtx));
15377 /* Not needed with an alignment of 2 */
15380 emit_label (align_2_label);
15382 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15386 emit_insn (gen_adddi3 (out, out, const1_rtx));
15388 emit_insn (gen_addsi3 (out, out, const1_rtx));
15390 emit_label (align_3_label);
15393 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15397 emit_insn (gen_adddi3 (out, out, const1_rtx));
15399 emit_insn (gen_addsi3 (out, out, const1_rtx));
15402 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15403 align this loop. It gives only huge programs, but does not help to
15405 emit_label (align_4_label);
15407 mem = change_address (src, SImode, out);
15408 emit_move_insn (scratch, mem);
15410 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15412 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15414 /* This formula yields a nonzero result iff one of the bytes is zero.
15415 This saves three branches inside loop and many cycles. */
15417 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15418 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15419 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15420 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15421 gen_int_mode (0x80808080, SImode)));
15422 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15427 rtx reg = gen_reg_rtx (SImode);
15428 rtx reg2 = gen_reg_rtx (Pmode);
15429 emit_move_insn (reg, tmpreg);
15430 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15432 /* If zero is not in the first two bytes, move two bytes forward. */
15433 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15434 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15435 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15436 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15437 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15440 /* Emit lea manually to avoid clobbering of flags. */
15441 emit_insn (gen_rtx_SET (SImode, reg2,
15442 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15444 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15445 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15446 emit_insn (gen_rtx_SET (VOIDmode, out,
15447 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15454 rtx end_2_label = gen_label_rtx ();
15455 /* Is zero in the first two bytes? */
15457 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15458 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15459 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15460 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15461 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15463 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15464 JUMP_LABEL (tmp) = end_2_label;
15466 /* Not in the first two. Move two bytes forward. */
15467 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15469 emit_insn (gen_adddi3 (out, out, const2_rtx));
15471 emit_insn (gen_addsi3 (out, out, const2_rtx));
15473 emit_label (end_2_label);
15477 /* Avoid branch in fixing the byte. */
15478 tmpreg = gen_lowpart (QImode, tmpreg);
15479 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15480 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15482 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15484 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15486 emit_label (end_0_label);
15489 /* Expand strlen. */
15492 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15494 rtx addr, scratch1, scratch2, scratch3, scratch4;
15496 /* The generic case of strlen expander is long. Avoid it's
15497 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15499 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15500 && !TARGET_INLINE_ALL_STRINGOPS
15502 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15505 addr = force_reg (Pmode, XEXP (src, 0));
15506 scratch1 = gen_reg_rtx (Pmode);
15508 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15511 /* Well it seems that some optimizer does not combine a call like
15512 foo(strlen(bar), strlen(bar));
15513 when the move and the subtraction is done here. It does calculate
15514 the length just once when these instructions are done inside of
15515 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15516 often used and I use one fewer register for the lifetime of
15517 output_strlen_unroll() this is better. */
15519 emit_move_insn (out, addr);
15521 ix86_expand_strlensi_unroll_1 (out, src, align);
15523 /* strlensi_unroll_1 returns the address of the zero at the end of
15524 the string, like memchr(), so compute the length by subtracting
15525 the start address. */
15527 emit_insn (gen_subdi3 (out, out, addr));
15529 emit_insn (gen_subsi3 (out, out, addr));
15534 scratch2 = gen_reg_rtx (Pmode);
15535 scratch3 = gen_reg_rtx (Pmode);
15536 scratch4 = force_reg (Pmode, constm1_rtx);
15538 emit_move_insn (scratch3, addr);
15539 eoschar = force_reg (QImode, eoschar);
15541 src = replace_equiv_address_nv (src, scratch3);
15543 /* If .md starts supporting :P, this can be done in .md. */
15544 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15545 scratch4), UNSPEC_SCAS);
15546 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15549 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15550 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15554 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15555 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15561 /* For given symbol (function) construct code to compute address of it's PLT
15562 entry in large x86-64 PIC model. */
15564 construct_plt_address (rtx symbol)
15566 rtx tmp = gen_reg_rtx (Pmode);
15567 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15569 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15570 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15572 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15573 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15578 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15579 rtx callarg2 ATTRIBUTE_UNUSED,
15580 rtx pop, int sibcall)
15582 rtx use = NULL, call;
15584 if (pop == const0_rtx)
15586 gcc_assert (!TARGET_64BIT || !pop);
15588 if (TARGET_MACHO && !TARGET_64BIT)
15591 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15592 fnaddr = machopic_indirect_call_target (fnaddr);
15597 /* Static functions and indirect calls don't need the pic register. */
15598 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15599 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15600 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15601 use_reg (&use, pic_offset_table_rtx);
15604 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15606 rtx al = gen_rtx_REG (QImode, 0);
15607 emit_move_insn (al, callarg2);
15608 use_reg (&use, al);
15611 if (ix86_cmodel == CM_LARGE_PIC
15612 && GET_CODE (fnaddr) == MEM
15613 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15614 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15615 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15616 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15618 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15619 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15621 if (sibcall && TARGET_64BIT
15622 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15625 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15626 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15627 emit_move_insn (fnaddr, addr);
15628 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15631 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15633 call = gen_rtx_SET (VOIDmode, retval, call);
15636 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15637 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15638 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15641 call = emit_call_insn (call);
15643 CALL_INSN_FUNCTION_USAGE (call) = use;
15647 /* Clear stack slot assignments remembered from previous functions.
15648 This is called from INIT_EXPANDERS once before RTL is emitted for each
15651 static struct machine_function *
15652 ix86_init_machine_status (void)
15654 struct machine_function *f;
15656 f = GGC_CNEW (struct machine_function);
15657 f->use_fast_prologue_epilogue_nregs = -1;
15658 f->tls_descriptor_call_expanded_p = 0;
15663 /* Return a MEM corresponding to a stack slot with mode MODE.
15664 Allocate a new slot if necessary.
15666 The RTL for a function can have several slots available: N is
15667 which slot to use. */
15670 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15672 struct stack_local_entry *s;
15674 gcc_assert (n < MAX_386_STACK_LOCALS);
15676 /* Virtual slot is valid only before vregs are instantiated. */
15677 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
15679 for (s = ix86_stack_locals; s; s = s->next)
15680 if (s->mode == mode && s->n == n)
15681 return copy_rtx (s->rtl);
15683 s = (struct stack_local_entry *)
15684 ggc_alloc (sizeof (struct stack_local_entry));
15687 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15689 s->next = ix86_stack_locals;
15690 ix86_stack_locals = s;
15694 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15696 static GTY(()) rtx ix86_tls_symbol;
15698 ix86_tls_get_addr (void)
15701 if (!ix86_tls_symbol)
15703 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15704 (TARGET_ANY_GNU_TLS
15706 ? "___tls_get_addr"
15707 : "__tls_get_addr");
15710 return ix86_tls_symbol;
15713 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15715 static GTY(()) rtx ix86_tls_module_base_symbol;
15717 ix86_tls_module_base (void)
15720 if (!ix86_tls_module_base_symbol)
15722 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15723 "_TLS_MODULE_BASE_");
15724 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15725 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15728 return ix86_tls_module_base_symbol;
15731 /* Calculate the length of the memory address in the instruction
15732 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15735 memory_address_length (rtx addr)
15737 struct ix86_address parts;
15738 rtx base, index, disp;
15742 if (GET_CODE (addr) == PRE_DEC
15743 || GET_CODE (addr) == POST_INC
15744 || GET_CODE (addr) == PRE_MODIFY
15745 || GET_CODE (addr) == POST_MODIFY)
15748 ok = ix86_decompose_address (addr, &parts);
15751 if (parts.base && GET_CODE (parts.base) == SUBREG)
15752 parts.base = SUBREG_REG (parts.base);
15753 if (parts.index && GET_CODE (parts.index) == SUBREG)
15754 parts.index = SUBREG_REG (parts.index);
15757 index = parts.index;
15762 - esp as the base always wants an index,
15763 - ebp as the base always wants a displacement. */
15765 /* Register Indirect. */
15766 if (base && !index && !disp)
15768 /* esp (for its index) and ebp (for its displacement) need
15769 the two-byte modrm form. */
15770 if (addr == stack_pointer_rtx
15771 || addr == arg_pointer_rtx
15772 || addr == frame_pointer_rtx
15773 || addr == hard_frame_pointer_rtx)
15777 /* Direct Addressing. */
15778 else if (disp && !base && !index)
15783 /* Find the length of the displacement constant. */
15786 if (base && satisfies_constraint_K (disp))
15791 /* ebp always wants a displacement. */
15792 else if (base == hard_frame_pointer_rtx)
15795 /* An index requires the two-byte modrm form.... */
15797 /* ...like esp, which always wants an index. */
15798 || base == stack_pointer_rtx
15799 || base == arg_pointer_rtx
15800 || base == frame_pointer_rtx)
15807 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15808 is set, expect that insn have 8bit immediate alternative. */
15810 ix86_attr_length_immediate_default (rtx insn, int shortform)
15814 extract_insn_cached (insn);
15815 for (i = recog_data.n_operands - 1; i >= 0; --i)
15816 if (CONSTANT_P (recog_data.operand[i]))
15819 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15823 switch (get_attr_mode (insn))
15834 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15839 fatal_insn ("unknown insn mode", insn);
15845 /* Compute default value for "length_address" attribute. */
15847 ix86_attr_length_address_default (rtx insn)
15851 if (get_attr_type (insn) == TYPE_LEA)
15853 rtx set = PATTERN (insn);
15855 if (GET_CODE (set) == PARALLEL)
15856 set = XVECEXP (set, 0, 0);
15858 gcc_assert (GET_CODE (set) == SET);
15860 return memory_address_length (SET_SRC (set));
15863 extract_insn_cached (insn);
15864 for (i = recog_data.n_operands - 1; i >= 0; --i)
15865 if (MEM_P (recog_data.operand[i]))
15867 return memory_address_length (XEXP (recog_data.operand[i], 0));
15873 /* Return the maximum number of instructions a cpu can issue. */
15876 ix86_issue_rate (void)
15880 case PROCESSOR_PENTIUM:
15884 case PROCESSOR_PENTIUMPRO:
15885 case PROCESSOR_PENTIUM4:
15886 case PROCESSOR_ATHLON:
15888 case PROCESSOR_AMDFAM10:
15889 case PROCESSOR_NOCONA:
15890 case PROCESSOR_GENERIC32:
15891 case PROCESSOR_GENERIC64:
15894 case PROCESSOR_CORE2:
15902 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15903 by DEP_INSN and nothing set by DEP_INSN. */
15906 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15910 /* Simplify the test for uninteresting insns. */
15911 if (insn_type != TYPE_SETCC
15912 && insn_type != TYPE_ICMOV
15913 && insn_type != TYPE_FCMOV
15914 && insn_type != TYPE_IBR)
15917 if ((set = single_set (dep_insn)) != 0)
15919 set = SET_DEST (set);
15922 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15923 && XVECLEN (PATTERN (dep_insn), 0) == 2
15924 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15925 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15927 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15928 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15933 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15936 /* This test is true if the dependent insn reads the flags but
15937 not any other potentially set register. */
15938 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15941 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15947 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15948 address with operands set by DEP_INSN. */
15951 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15955 if (insn_type == TYPE_LEA
15958 addr = PATTERN (insn);
15960 if (GET_CODE (addr) == PARALLEL)
15961 addr = XVECEXP (addr, 0, 0);
15963 gcc_assert (GET_CODE (addr) == SET);
15965 addr = SET_SRC (addr);
15970 extract_insn_cached (insn);
15971 for (i = recog_data.n_operands - 1; i >= 0; --i)
15972 if (MEM_P (recog_data.operand[i]))
15974 addr = XEXP (recog_data.operand[i], 0);
15981 return modified_in_p (addr, dep_insn);
15985 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15987 enum attr_type insn_type, dep_insn_type;
15988 enum attr_memory memory;
15990 int dep_insn_code_number;
15992 /* Anti and output dependencies have zero cost on all CPUs. */
15993 if (REG_NOTE_KIND (link) != 0)
15996 dep_insn_code_number = recog_memoized (dep_insn);
15998 /* If we can't recognize the insns, we can't really do anything. */
15999 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16002 insn_type = get_attr_type (insn);
16003 dep_insn_type = get_attr_type (dep_insn);
16007 case PROCESSOR_PENTIUM:
16008 /* Address Generation Interlock adds a cycle of latency. */
16009 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16012 /* ??? Compares pair with jump/setcc. */
16013 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16016 /* Floating point stores require value to be ready one cycle earlier. */
16017 if (insn_type == TYPE_FMOV
16018 && get_attr_memory (insn) == MEMORY_STORE
16019 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16023 case PROCESSOR_PENTIUMPRO:
16024 memory = get_attr_memory (insn);
16026 /* INT->FP conversion is expensive. */
16027 if (get_attr_fp_int_src (dep_insn))
16030 /* There is one cycle extra latency between an FP op and a store. */
16031 if (insn_type == TYPE_FMOV
16032 && (set = single_set (dep_insn)) != NULL_RTX
16033 && (set2 = single_set (insn)) != NULL_RTX
16034 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16035 && MEM_P (SET_DEST (set2)))
16038 /* Show ability of reorder buffer to hide latency of load by executing
16039 in parallel with previous instruction in case
16040 previous instruction is not needed to compute the address. */
16041 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16042 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16044 /* Claim moves to take one cycle, as core can issue one load
16045 at time and the next load can start cycle later. */
16046 if (dep_insn_type == TYPE_IMOV
16047 || dep_insn_type == TYPE_FMOV)
16055 memory = get_attr_memory (insn);
16057 /* The esp dependency is resolved before the instruction is really
16059 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16060 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16063 /* INT->FP conversion is expensive. */
16064 if (get_attr_fp_int_src (dep_insn))
16067 /* Show ability of reorder buffer to hide latency of load by executing
16068 in parallel with previous instruction in case
16069 previous instruction is not needed to compute the address. */
16070 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16071 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16073 /* Claim moves to take one cycle, as core can issue one load
16074 at time and the next load can start cycle later. */
16075 if (dep_insn_type == TYPE_IMOV
16076 || dep_insn_type == TYPE_FMOV)
16085 case PROCESSOR_ATHLON:
16087 case PROCESSOR_AMDFAM10:
16088 case PROCESSOR_GENERIC32:
16089 case PROCESSOR_GENERIC64:
16090 memory = get_attr_memory (insn);
16092 /* Show ability of reorder buffer to hide latency of load by executing
16093 in parallel with previous instruction in case
16094 previous instruction is not needed to compute the address. */
16095 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16096 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16098 enum attr_unit unit = get_attr_unit (insn);
16101 /* Because of the difference between the length of integer and
16102 floating unit pipeline preparation stages, the memory operands
16103 for floating point are cheaper.
16105 ??? For Athlon it the difference is most probably 2. */
16106 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16109 loadcost = TARGET_ATHLON ? 2 : 0;
16111 if (cost >= loadcost)
16124 /* How many alternative schedules to try. This should be as wide as the
16125 scheduling freedom in the DFA, but no wider. Making this value too
16126 large results extra work for the scheduler. */
16129 ia32_multipass_dfa_lookahead (void)
16131 if (ix86_tune == PROCESSOR_PENTIUM)
16134 if (ix86_tune == PROCESSOR_PENTIUMPRO
16135 || ix86_tune == PROCESSOR_K6)
16143 /* Compute the alignment given to a constant that is being placed in memory.
16144 EXP is the constant and ALIGN is the alignment that the object would
16146 The value of this function is used instead of that alignment to align
16150 ix86_constant_alignment (tree exp, int align)
16152 if (TREE_CODE (exp) == REAL_CST)
16154 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16156 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16159 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16160 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16161 return BITS_PER_WORD;
16166 /* Compute the alignment for a static variable.
16167 TYPE is the data type, and ALIGN is the alignment that
16168 the object would ordinarily have. The value of this function is used
16169 instead of that alignment to align the object. */
16172 ix86_data_alignment (tree type, int align)
16174 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16176 if (AGGREGATE_TYPE_P (type)
16177 && TYPE_SIZE (type)
16178 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16179 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16180 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16181 && align < max_align)
16184 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16185 to 16byte boundary. */
16188 if (AGGREGATE_TYPE_P (type)
16189 && TYPE_SIZE (type)
16190 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16191 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16192 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16196 if (TREE_CODE (type) == ARRAY_TYPE)
16198 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16200 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16203 else if (TREE_CODE (type) == COMPLEX_TYPE)
16206 if (TYPE_MODE (type) == DCmode && align < 64)
16208 if (TYPE_MODE (type) == XCmode && align < 128)
16211 else if ((TREE_CODE (type) == RECORD_TYPE
16212 || TREE_CODE (type) == UNION_TYPE
16213 || TREE_CODE (type) == QUAL_UNION_TYPE)
16214 && TYPE_FIELDS (type))
16216 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16218 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16221 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16222 || TREE_CODE (type) == INTEGER_TYPE)
16224 if (TYPE_MODE (type) == DFmode && align < 64)
16226 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16233 /* Compute the alignment for a local variable.
16234 TYPE is the data type, and ALIGN is the alignment that
16235 the object would ordinarily have. The value of this macro is used
16236 instead of that alignment to align the object. */
16239 ix86_local_alignment (tree type, int align)
16241 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16242 to 16byte boundary. */
16245 if (AGGREGATE_TYPE_P (type)
16246 && TYPE_SIZE (type)
16247 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16248 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16249 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16252 if (TREE_CODE (type) == ARRAY_TYPE)
16254 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16256 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16259 else if (TREE_CODE (type) == COMPLEX_TYPE)
16261 if (TYPE_MODE (type) == DCmode && align < 64)
16263 if (TYPE_MODE (type) == XCmode && align < 128)
16266 else if ((TREE_CODE (type) == RECORD_TYPE
16267 || TREE_CODE (type) == UNION_TYPE
16268 || TREE_CODE (type) == QUAL_UNION_TYPE)
16269 && TYPE_FIELDS (type))
16271 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16273 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16276 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16277 || TREE_CODE (type) == INTEGER_TYPE)
16280 if (TYPE_MODE (type) == DFmode && align < 64)
16282 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16288 /* Emit RTL insns to initialize the variable parts of a trampoline.
16289 FNADDR is an RTX for the address of the function's pure code.
16290 CXT is an RTX for the static chain value for the function. */
16292 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16296 /* Compute offset from the end of the jmp to the target function. */
16297 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16298 plus_constant (tramp, 10),
16299 NULL_RTX, 1, OPTAB_DIRECT);
16300 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16301 gen_int_mode (0xb9, QImode));
16302 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16303 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16304 gen_int_mode (0xe9, QImode));
16305 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16310 /* Try to load address using shorter movl instead of movabs.
16311 We may want to support movq for kernel mode, but kernel does not use
16312 trampolines at the moment. */
16313 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16315 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16316 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16317 gen_int_mode (0xbb41, HImode));
16318 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16319 gen_lowpart (SImode, fnaddr));
16324 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16325 gen_int_mode (0xbb49, HImode));
16326 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16330 /* Load static chain using movabs to r10. */
16331 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16332 gen_int_mode (0xba49, HImode));
16333 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16336 /* Jump to the r11 */
16337 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16338 gen_int_mode (0xff49, HImode));
16339 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16340 gen_int_mode (0xe3, QImode));
16342 gcc_assert (offset <= TRAMPOLINE_SIZE);
16345 #ifdef ENABLE_EXECUTE_STACK
16346 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16347 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16351 /* Codes for all the SSE/MMX builtins. */
16354 IX86_BUILTIN_ADDPS,
16355 IX86_BUILTIN_ADDSS,
16356 IX86_BUILTIN_DIVPS,
16357 IX86_BUILTIN_DIVSS,
16358 IX86_BUILTIN_MULPS,
16359 IX86_BUILTIN_MULSS,
16360 IX86_BUILTIN_SUBPS,
16361 IX86_BUILTIN_SUBSS,
16363 IX86_BUILTIN_CMPEQPS,
16364 IX86_BUILTIN_CMPLTPS,
16365 IX86_BUILTIN_CMPLEPS,
16366 IX86_BUILTIN_CMPGTPS,
16367 IX86_BUILTIN_CMPGEPS,
16368 IX86_BUILTIN_CMPNEQPS,
16369 IX86_BUILTIN_CMPNLTPS,
16370 IX86_BUILTIN_CMPNLEPS,
16371 IX86_BUILTIN_CMPNGTPS,
16372 IX86_BUILTIN_CMPNGEPS,
16373 IX86_BUILTIN_CMPORDPS,
16374 IX86_BUILTIN_CMPUNORDPS,
16375 IX86_BUILTIN_CMPEQSS,
16376 IX86_BUILTIN_CMPLTSS,
16377 IX86_BUILTIN_CMPLESS,
16378 IX86_BUILTIN_CMPNEQSS,
16379 IX86_BUILTIN_CMPNLTSS,
16380 IX86_BUILTIN_CMPNLESS,
16381 IX86_BUILTIN_CMPNGTSS,
16382 IX86_BUILTIN_CMPNGESS,
16383 IX86_BUILTIN_CMPORDSS,
16384 IX86_BUILTIN_CMPUNORDSS,
16386 IX86_BUILTIN_COMIEQSS,
16387 IX86_BUILTIN_COMILTSS,
16388 IX86_BUILTIN_COMILESS,
16389 IX86_BUILTIN_COMIGTSS,
16390 IX86_BUILTIN_COMIGESS,
16391 IX86_BUILTIN_COMINEQSS,
16392 IX86_BUILTIN_UCOMIEQSS,
16393 IX86_BUILTIN_UCOMILTSS,
16394 IX86_BUILTIN_UCOMILESS,
16395 IX86_BUILTIN_UCOMIGTSS,
16396 IX86_BUILTIN_UCOMIGESS,
16397 IX86_BUILTIN_UCOMINEQSS,
16399 IX86_BUILTIN_CVTPI2PS,
16400 IX86_BUILTIN_CVTPS2PI,
16401 IX86_BUILTIN_CVTSI2SS,
16402 IX86_BUILTIN_CVTSI642SS,
16403 IX86_BUILTIN_CVTSS2SI,
16404 IX86_BUILTIN_CVTSS2SI64,
16405 IX86_BUILTIN_CVTTPS2PI,
16406 IX86_BUILTIN_CVTTSS2SI,
16407 IX86_BUILTIN_CVTTSS2SI64,
16409 IX86_BUILTIN_MAXPS,
16410 IX86_BUILTIN_MAXSS,
16411 IX86_BUILTIN_MINPS,
16412 IX86_BUILTIN_MINSS,
16414 IX86_BUILTIN_LOADUPS,
16415 IX86_BUILTIN_STOREUPS,
16416 IX86_BUILTIN_MOVSS,
16418 IX86_BUILTIN_MOVHLPS,
16419 IX86_BUILTIN_MOVLHPS,
16420 IX86_BUILTIN_LOADHPS,
16421 IX86_BUILTIN_LOADLPS,
16422 IX86_BUILTIN_STOREHPS,
16423 IX86_BUILTIN_STORELPS,
16425 IX86_BUILTIN_MASKMOVQ,
16426 IX86_BUILTIN_MOVMSKPS,
16427 IX86_BUILTIN_PMOVMSKB,
16429 IX86_BUILTIN_MOVNTPS,
16430 IX86_BUILTIN_MOVNTQ,
16432 IX86_BUILTIN_LOADDQU,
16433 IX86_BUILTIN_STOREDQU,
16435 IX86_BUILTIN_PACKSSWB,
16436 IX86_BUILTIN_PACKSSDW,
16437 IX86_BUILTIN_PACKUSWB,
16439 IX86_BUILTIN_PADDB,
16440 IX86_BUILTIN_PADDW,
16441 IX86_BUILTIN_PADDD,
16442 IX86_BUILTIN_PADDQ,
16443 IX86_BUILTIN_PADDSB,
16444 IX86_BUILTIN_PADDSW,
16445 IX86_BUILTIN_PADDUSB,
16446 IX86_BUILTIN_PADDUSW,
16447 IX86_BUILTIN_PSUBB,
16448 IX86_BUILTIN_PSUBW,
16449 IX86_BUILTIN_PSUBD,
16450 IX86_BUILTIN_PSUBQ,
16451 IX86_BUILTIN_PSUBSB,
16452 IX86_BUILTIN_PSUBSW,
16453 IX86_BUILTIN_PSUBUSB,
16454 IX86_BUILTIN_PSUBUSW,
16457 IX86_BUILTIN_PANDN,
16461 IX86_BUILTIN_PAVGB,
16462 IX86_BUILTIN_PAVGW,
16464 IX86_BUILTIN_PCMPEQB,
16465 IX86_BUILTIN_PCMPEQW,
16466 IX86_BUILTIN_PCMPEQD,
16467 IX86_BUILTIN_PCMPGTB,
16468 IX86_BUILTIN_PCMPGTW,
16469 IX86_BUILTIN_PCMPGTD,
16471 IX86_BUILTIN_PMADDWD,
16473 IX86_BUILTIN_PMAXSW,
16474 IX86_BUILTIN_PMAXUB,
16475 IX86_BUILTIN_PMINSW,
16476 IX86_BUILTIN_PMINUB,
16478 IX86_BUILTIN_PMULHUW,
16479 IX86_BUILTIN_PMULHW,
16480 IX86_BUILTIN_PMULLW,
16482 IX86_BUILTIN_PSADBW,
16483 IX86_BUILTIN_PSHUFW,
16485 IX86_BUILTIN_PSLLW,
16486 IX86_BUILTIN_PSLLD,
16487 IX86_BUILTIN_PSLLQ,
16488 IX86_BUILTIN_PSRAW,
16489 IX86_BUILTIN_PSRAD,
16490 IX86_BUILTIN_PSRLW,
16491 IX86_BUILTIN_PSRLD,
16492 IX86_BUILTIN_PSRLQ,
16493 IX86_BUILTIN_PSLLWI,
16494 IX86_BUILTIN_PSLLDI,
16495 IX86_BUILTIN_PSLLQI,
16496 IX86_BUILTIN_PSRAWI,
16497 IX86_BUILTIN_PSRADI,
16498 IX86_BUILTIN_PSRLWI,
16499 IX86_BUILTIN_PSRLDI,
16500 IX86_BUILTIN_PSRLQI,
16502 IX86_BUILTIN_PUNPCKHBW,
16503 IX86_BUILTIN_PUNPCKHWD,
16504 IX86_BUILTIN_PUNPCKHDQ,
16505 IX86_BUILTIN_PUNPCKLBW,
16506 IX86_BUILTIN_PUNPCKLWD,
16507 IX86_BUILTIN_PUNPCKLDQ,
16509 IX86_BUILTIN_SHUFPS,
16511 IX86_BUILTIN_RCPPS,
16512 IX86_BUILTIN_RCPSS,
16513 IX86_BUILTIN_RSQRTPS,
16514 IX86_BUILTIN_RSQRTSS,
16515 IX86_BUILTIN_RSQRTF,
16516 IX86_BUILTIN_SQRTPS,
16517 IX86_BUILTIN_SQRTSS,
16519 IX86_BUILTIN_UNPCKHPS,
16520 IX86_BUILTIN_UNPCKLPS,
16522 IX86_BUILTIN_ANDPS,
16523 IX86_BUILTIN_ANDNPS,
16525 IX86_BUILTIN_XORPS,
16528 IX86_BUILTIN_LDMXCSR,
16529 IX86_BUILTIN_STMXCSR,
16530 IX86_BUILTIN_SFENCE,
16532 /* 3DNow! Original */
16533 IX86_BUILTIN_FEMMS,
16534 IX86_BUILTIN_PAVGUSB,
16535 IX86_BUILTIN_PF2ID,
16536 IX86_BUILTIN_PFACC,
16537 IX86_BUILTIN_PFADD,
16538 IX86_BUILTIN_PFCMPEQ,
16539 IX86_BUILTIN_PFCMPGE,
16540 IX86_BUILTIN_PFCMPGT,
16541 IX86_BUILTIN_PFMAX,
16542 IX86_BUILTIN_PFMIN,
16543 IX86_BUILTIN_PFMUL,
16544 IX86_BUILTIN_PFRCP,
16545 IX86_BUILTIN_PFRCPIT1,
16546 IX86_BUILTIN_PFRCPIT2,
16547 IX86_BUILTIN_PFRSQIT1,
16548 IX86_BUILTIN_PFRSQRT,
16549 IX86_BUILTIN_PFSUB,
16550 IX86_BUILTIN_PFSUBR,
16551 IX86_BUILTIN_PI2FD,
16552 IX86_BUILTIN_PMULHRW,
16554 /* 3DNow! Athlon Extensions */
16555 IX86_BUILTIN_PF2IW,
16556 IX86_BUILTIN_PFNACC,
16557 IX86_BUILTIN_PFPNACC,
16558 IX86_BUILTIN_PI2FW,
16559 IX86_BUILTIN_PSWAPDSI,
16560 IX86_BUILTIN_PSWAPDSF,
16563 IX86_BUILTIN_ADDPD,
16564 IX86_BUILTIN_ADDSD,
16565 IX86_BUILTIN_DIVPD,
16566 IX86_BUILTIN_DIVSD,
16567 IX86_BUILTIN_MULPD,
16568 IX86_BUILTIN_MULSD,
16569 IX86_BUILTIN_SUBPD,
16570 IX86_BUILTIN_SUBSD,
16572 IX86_BUILTIN_CMPEQPD,
16573 IX86_BUILTIN_CMPLTPD,
16574 IX86_BUILTIN_CMPLEPD,
16575 IX86_BUILTIN_CMPGTPD,
16576 IX86_BUILTIN_CMPGEPD,
16577 IX86_BUILTIN_CMPNEQPD,
16578 IX86_BUILTIN_CMPNLTPD,
16579 IX86_BUILTIN_CMPNLEPD,
16580 IX86_BUILTIN_CMPNGTPD,
16581 IX86_BUILTIN_CMPNGEPD,
16582 IX86_BUILTIN_CMPORDPD,
16583 IX86_BUILTIN_CMPUNORDPD,
16584 IX86_BUILTIN_CMPEQSD,
16585 IX86_BUILTIN_CMPLTSD,
16586 IX86_BUILTIN_CMPLESD,
16587 IX86_BUILTIN_CMPNEQSD,
16588 IX86_BUILTIN_CMPNLTSD,
16589 IX86_BUILTIN_CMPNLESD,
16590 IX86_BUILTIN_CMPORDSD,
16591 IX86_BUILTIN_CMPUNORDSD,
16593 IX86_BUILTIN_COMIEQSD,
16594 IX86_BUILTIN_COMILTSD,
16595 IX86_BUILTIN_COMILESD,
16596 IX86_BUILTIN_COMIGTSD,
16597 IX86_BUILTIN_COMIGESD,
16598 IX86_BUILTIN_COMINEQSD,
16599 IX86_BUILTIN_UCOMIEQSD,
16600 IX86_BUILTIN_UCOMILTSD,
16601 IX86_BUILTIN_UCOMILESD,
16602 IX86_BUILTIN_UCOMIGTSD,
16603 IX86_BUILTIN_UCOMIGESD,
16604 IX86_BUILTIN_UCOMINEQSD,
16606 IX86_BUILTIN_MAXPD,
16607 IX86_BUILTIN_MAXSD,
16608 IX86_BUILTIN_MINPD,
16609 IX86_BUILTIN_MINSD,
16611 IX86_BUILTIN_ANDPD,
16612 IX86_BUILTIN_ANDNPD,
16614 IX86_BUILTIN_XORPD,
16616 IX86_BUILTIN_SQRTPD,
16617 IX86_BUILTIN_SQRTSD,
16619 IX86_BUILTIN_UNPCKHPD,
16620 IX86_BUILTIN_UNPCKLPD,
16622 IX86_BUILTIN_SHUFPD,
16624 IX86_BUILTIN_LOADUPD,
16625 IX86_BUILTIN_STOREUPD,
16626 IX86_BUILTIN_MOVSD,
16628 IX86_BUILTIN_LOADHPD,
16629 IX86_BUILTIN_LOADLPD,
16631 IX86_BUILTIN_CVTDQ2PD,
16632 IX86_BUILTIN_CVTDQ2PS,
16634 IX86_BUILTIN_CVTPD2DQ,
16635 IX86_BUILTIN_CVTPD2PI,
16636 IX86_BUILTIN_CVTPD2PS,
16637 IX86_BUILTIN_CVTTPD2DQ,
16638 IX86_BUILTIN_CVTTPD2PI,
16640 IX86_BUILTIN_CVTPI2PD,
16641 IX86_BUILTIN_CVTSI2SD,
16642 IX86_BUILTIN_CVTSI642SD,
16644 IX86_BUILTIN_CVTSD2SI,
16645 IX86_BUILTIN_CVTSD2SI64,
16646 IX86_BUILTIN_CVTSD2SS,
16647 IX86_BUILTIN_CVTSS2SD,
16648 IX86_BUILTIN_CVTTSD2SI,
16649 IX86_BUILTIN_CVTTSD2SI64,
16651 IX86_BUILTIN_CVTPS2DQ,
16652 IX86_BUILTIN_CVTPS2PD,
16653 IX86_BUILTIN_CVTTPS2DQ,
16655 IX86_BUILTIN_MOVNTI,
16656 IX86_BUILTIN_MOVNTPD,
16657 IX86_BUILTIN_MOVNTDQ,
16660 IX86_BUILTIN_MASKMOVDQU,
16661 IX86_BUILTIN_MOVMSKPD,
16662 IX86_BUILTIN_PMOVMSKB128,
16664 IX86_BUILTIN_PACKSSWB128,
16665 IX86_BUILTIN_PACKSSDW128,
16666 IX86_BUILTIN_PACKUSWB128,
16668 IX86_BUILTIN_PADDB128,
16669 IX86_BUILTIN_PADDW128,
16670 IX86_BUILTIN_PADDD128,
16671 IX86_BUILTIN_PADDQ128,
16672 IX86_BUILTIN_PADDSB128,
16673 IX86_BUILTIN_PADDSW128,
16674 IX86_BUILTIN_PADDUSB128,
16675 IX86_BUILTIN_PADDUSW128,
16676 IX86_BUILTIN_PSUBB128,
16677 IX86_BUILTIN_PSUBW128,
16678 IX86_BUILTIN_PSUBD128,
16679 IX86_BUILTIN_PSUBQ128,
16680 IX86_BUILTIN_PSUBSB128,
16681 IX86_BUILTIN_PSUBSW128,
16682 IX86_BUILTIN_PSUBUSB128,
16683 IX86_BUILTIN_PSUBUSW128,
16685 IX86_BUILTIN_PAND128,
16686 IX86_BUILTIN_PANDN128,
16687 IX86_BUILTIN_POR128,
16688 IX86_BUILTIN_PXOR128,
16690 IX86_BUILTIN_PAVGB128,
16691 IX86_BUILTIN_PAVGW128,
16693 IX86_BUILTIN_PCMPEQB128,
16694 IX86_BUILTIN_PCMPEQW128,
16695 IX86_BUILTIN_PCMPEQD128,
16696 IX86_BUILTIN_PCMPGTB128,
16697 IX86_BUILTIN_PCMPGTW128,
16698 IX86_BUILTIN_PCMPGTD128,
16700 IX86_BUILTIN_PMADDWD128,
16702 IX86_BUILTIN_PMAXSW128,
16703 IX86_BUILTIN_PMAXUB128,
16704 IX86_BUILTIN_PMINSW128,
16705 IX86_BUILTIN_PMINUB128,
16707 IX86_BUILTIN_PMULUDQ,
16708 IX86_BUILTIN_PMULUDQ128,
16709 IX86_BUILTIN_PMULHUW128,
16710 IX86_BUILTIN_PMULHW128,
16711 IX86_BUILTIN_PMULLW128,
16713 IX86_BUILTIN_PSADBW128,
16714 IX86_BUILTIN_PSHUFHW,
16715 IX86_BUILTIN_PSHUFLW,
16716 IX86_BUILTIN_PSHUFD,
16718 IX86_BUILTIN_PSLLDQI128,
16719 IX86_BUILTIN_PSLLWI128,
16720 IX86_BUILTIN_PSLLDI128,
16721 IX86_BUILTIN_PSLLQI128,
16722 IX86_BUILTIN_PSRAWI128,
16723 IX86_BUILTIN_PSRADI128,
16724 IX86_BUILTIN_PSRLDQI128,
16725 IX86_BUILTIN_PSRLWI128,
16726 IX86_BUILTIN_PSRLDI128,
16727 IX86_BUILTIN_PSRLQI128,
16729 IX86_BUILTIN_PSLLDQ128,
16730 IX86_BUILTIN_PSLLW128,
16731 IX86_BUILTIN_PSLLD128,
16732 IX86_BUILTIN_PSLLQ128,
16733 IX86_BUILTIN_PSRAW128,
16734 IX86_BUILTIN_PSRAD128,
16735 IX86_BUILTIN_PSRLW128,
16736 IX86_BUILTIN_PSRLD128,
16737 IX86_BUILTIN_PSRLQ128,
16739 IX86_BUILTIN_PUNPCKHBW128,
16740 IX86_BUILTIN_PUNPCKHWD128,
16741 IX86_BUILTIN_PUNPCKHDQ128,
16742 IX86_BUILTIN_PUNPCKHQDQ128,
16743 IX86_BUILTIN_PUNPCKLBW128,
16744 IX86_BUILTIN_PUNPCKLWD128,
16745 IX86_BUILTIN_PUNPCKLDQ128,
16746 IX86_BUILTIN_PUNPCKLQDQ128,
16748 IX86_BUILTIN_CLFLUSH,
16749 IX86_BUILTIN_MFENCE,
16750 IX86_BUILTIN_LFENCE,
16752 /* Prescott New Instructions. */
16753 IX86_BUILTIN_ADDSUBPS,
16754 IX86_BUILTIN_HADDPS,
16755 IX86_BUILTIN_HSUBPS,
16756 IX86_BUILTIN_MOVSHDUP,
16757 IX86_BUILTIN_MOVSLDUP,
16758 IX86_BUILTIN_ADDSUBPD,
16759 IX86_BUILTIN_HADDPD,
16760 IX86_BUILTIN_HSUBPD,
16761 IX86_BUILTIN_LDDQU,
16763 IX86_BUILTIN_MONITOR,
16764 IX86_BUILTIN_MWAIT,
16767 IX86_BUILTIN_PHADDW,
16768 IX86_BUILTIN_PHADDD,
16769 IX86_BUILTIN_PHADDSW,
16770 IX86_BUILTIN_PHSUBW,
16771 IX86_BUILTIN_PHSUBD,
16772 IX86_BUILTIN_PHSUBSW,
16773 IX86_BUILTIN_PMADDUBSW,
16774 IX86_BUILTIN_PMULHRSW,
16775 IX86_BUILTIN_PSHUFB,
16776 IX86_BUILTIN_PSIGNB,
16777 IX86_BUILTIN_PSIGNW,
16778 IX86_BUILTIN_PSIGND,
16779 IX86_BUILTIN_PALIGNR,
16780 IX86_BUILTIN_PABSB,
16781 IX86_BUILTIN_PABSW,
16782 IX86_BUILTIN_PABSD,
16784 IX86_BUILTIN_PHADDW128,
16785 IX86_BUILTIN_PHADDD128,
16786 IX86_BUILTIN_PHADDSW128,
16787 IX86_BUILTIN_PHSUBW128,
16788 IX86_BUILTIN_PHSUBD128,
16789 IX86_BUILTIN_PHSUBSW128,
16790 IX86_BUILTIN_PMADDUBSW128,
16791 IX86_BUILTIN_PMULHRSW128,
16792 IX86_BUILTIN_PSHUFB128,
16793 IX86_BUILTIN_PSIGNB128,
16794 IX86_BUILTIN_PSIGNW128,
16795 IX86_BUILTIN_PSIGND128,
16796 IX86_BUILTIN_PALIGNR128,
16797 IX86_BUILTIN_PABSB128,
16798 IX86_BUILTIN_PABSW128,
16799 IX86_BUILTIN_PABSD128,
16801 /* AMDFAM10 - SSE4A New Instructions. */
16802 IX86_BUILTIN_MOVNTSD,
16803 IX86_BUILTIN_MOVNTSS,
16804 IX86_BUILTIN_EXTRQI,
16805 IX86_BUILTIN_EXTRQ,
16806 IX86_BUILTIN_INSERTQI,
16807 IX86_BUILTIN_INSERTQ,
16810 IX86_BUILTIN_BLENDPD,
16811 IX86_BUILTIN_BLENDPS,
16812 IX86_BUILTIN_BLENDVPD,
16813 IX86_BUILTIN_BLENDVPS,
16814 IX86_BUILTIN_PBLENDVB128,
16815 IX86_BUILTIN_PBLENDW128,
16820 IX86_BUILTIN_INSERTPS128,
16822 IX86_BUILTIN_MOVNTDQA,
16823 IX86_BUILTIN_MPSADBW128,
16824 IX86_BUILTIN_PACKUSDW128,
16825 IX86_BUILTIN_PCMPEQQ,
16826 IX86_BUILTIN_PHMINPOSUW128,
16828 IX86_BUILTIN_PMAXSB128,
16829 IX86_BUILTIN_PMAXSD128,
16830 IX86_BUILTIN_PMAXUD128,
16831 IX86_BUILTIN_PMAXUW128,
16833 IX86_BUILTIN_PMINSB128,
16834 IX86_BUILTIN_PMINSD128,
16835 IX86_BUILTIN_PMINUD128,
16836 IX86_BUILTIN_PMINUW128,
16838 IX86_BUILTIN_PMOVSXBW128,
16839 IX86_BUILTIN_PMOVSXBD128,
16840 IX86_BUILTIN_PMOVSXBQ128,
16841 IX86_BUILTIN_PMOVSXWD128,
16842 IX86_BUILTIN_PMOVSXWQ128,
16843 IX86_BUILTIN_PMOVSXDQ128,
16845 IX86_BUILTIN_PMOVZXBW128,
16846 IX86_BUILTIN_PMOVZXBD128,
16847 IX86_BUILTIN_PMOVZXBQ128,
16848 IX86_BUILTIN_PMOVZXWD128,
16849 IX86_BUILTIN_PMOVZXWQ128,
16850 IX86_BUILTIN_PMOVZXDQ128,
16852 IX86_BUILTIN_PMULDQ128,
16853 IX86_BUILTIN_PMULLD128,
16855 IX86_BUILTIN_ROUNDPD,
16856 IX86_BUILTIN_ROUNDPS,
16857 IX86_BUILTIN_ROUNDSD,
16858 IX86_BUILTIN_ROUNDSS,
16860 IX86_BUILTIN_PTESTZ,
16861 IX86_BUILTIN_PTESTC,
16862 IX86_BUILTIN_PTESTNZC,
16864 IX86_BUILTIN_VEC_INIT_V2SI,
16865 IX86_BUILTIN_VEC_INIT_V4HI,
16866 IX86_BUILTIN_VEC_INIT_V8QI,
16867 IX86_BUILTIN_VEC_EXT_V2DF,
16868 IX86_BUILTIN_VEC_EXT_V2DI,
16869 IX86_BUILTIN_VEC_EXT_V4SF,
16870 IX86_BUILTIN_VEC_EXT_V4SI,
16871 IX86_BUILTIN_VEC_EXT_V8HI,
16872 IX86_BUILTIN_VEC_EXT_V2SI,
16873 IX86_BUILTIN_VEC_EXT_V4HI,
16874 IX86_BUILTIN_VEC_EXT_V16QI,
16875 IX86_BUILTIN_VEC_SET_V2DI,
16876 IX86_BUILTIN_VEC_SET_V4SF,
16877 IX86_BUILTIN_VEC_SET_V4SI,
16878 IX86_BUILTIN_VEC_SET_V8HI,
16879 IX86_BUILTIN_VEC_SET_V4HI,
16880 IX86_BUILTIN_VEC_SET_V16QI,
16882 IX86_BUILTIN_VEC_PACK_SFIX,
16885 IX86_BUILTIN_CRC32QI,
16886 IX86_BUILTIN_CRC32HI,
16887 IX86_BUILTIN_CRC32SI,
16888 IX86_BUILTIN_CRC32DI,
16890 IX86_BUILTIN_PCMPESTRI128,
16891 IX86_BUILTIN_PCMPESTRM128,
16892 IX86_BUILTIN_PCMPESTRA128,
16893 IX86_BUILTIN_PCMPESTRC128,
16894 IX86_BUILTIN_PCMPESTRO128,
16895 IX86_BUILTIN_PCMPESTRS128,
16896 IX86_BUILTIN_PCMPESTRZ128,
16897 IX86_BUILTIN_PCMPISTRI128,
16898 IX86_BUILTIN_PCMPISTRM128,
16899 IX86_BUILTIN_PCMPISTRA128,
16900 IX86_BUILTIN_PCMPISTRC128,
16901 IX86_BUILTIN_PCMPISTRO128,
16902 IX86_BUILTIN_PCMPISTRS128,
16903 IX86_BUILTIN_PCMPISTRZ128,
16905 IX86_BUILTIN_PCMPGTQ,
16907 /* TFmode support builtins. */
16909 IX86_BUILTIN_FABSQ,
16910 IX86_BUILTIN_COPYSIGNQ,
16915 /* Table for the ix86 builtin decls. */
16916 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16918 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
16919 * if the target_flags include one of MASK. Stores the function decl
16920 * in the ix86_builtins array.
16921 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16924 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16926 tree decl = NULL_TREE;
16928 if (mask & ix86_isa_flags
16929 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
16931 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16933 ix86_builtins[(int) code] = decl;
16939 /* Like def_builtin, but also marks the function decl "const". */
16942 def_builtin_const (int mask, const char *name, tree type,
16943 enum ix86_builtins code)
16945 tree decl = def_builtin (mask, name, type, code);
16947 TREE_READONLY (decl) = 1;
16951 /* Bits for builtin_description.flag. */
16953 /* Set when we don't support the comparison natively, and should
16954 swap_comparison in order to support it. */
16955 #define BUILTIN_DESC_SWAP_OPERANDS 1
16957 struct builtin_description
16959 const unsigned int mask;
16960 const enum insn_code icode;
16961 const char *const name;
16962 const enum ix86_builtins code;
16963 const enum rtx_code comparison;
16967 static const struct builtin_description bdesc_comi[] =
16969 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16970 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16971 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16972 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16973 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16974 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16975 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16976 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16977 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16978 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16979 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16980 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16982 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16984 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16985 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16986 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16987 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16995 static const struct builtin_description bdesc_ptest[] =
16998 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
16999 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17000 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
17003 static const struct builtin_description bdesc_pcmpestr[] =
17006 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17007 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17008 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17009 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17010 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17011 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17012 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17015 static const struct builtin_description bdesc_pcmpistr[] =
17018 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
17019 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
17020 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
17021 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
17022 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
17023 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
17024 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
17027 static const struct builtin_description bdesc_crc32[] =
17030 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17031 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17032 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17033 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
17036 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17037 static const struct builtin_description bdesc_sse_3arg[] =
17040 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
17041 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
17042 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
17043 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
17044 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
17045 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
17046 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
17047 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
17048 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
17049 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
17050 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
17051 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
17054 static const struct builtin_description bdesc_2arg[] =
17057 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
17058 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
17059 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
17060 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
17061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
17062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
17063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
17064 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
17066 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17069 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17070 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17071 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17072 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17073 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17074 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17075 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17076 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17077 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17078 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17079 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17080 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17081 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17082 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17083 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17084 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17087 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17089 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17090 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17092 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17094 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17096 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17097 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17099 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17100 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17101 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17102 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17103 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17106 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17107 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17108 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17109 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17110 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17111 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17112 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17113 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17115 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17117 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17118 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17120 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17126 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17129 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17133 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17134 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17143 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17144 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17145 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17146 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17160 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17161 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17162 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17176 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17178 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17179 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17183 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17187 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17188 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17189 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17190 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17207 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
17208 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
17209 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
17210 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
17211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
17212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
17213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
17214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
17215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
17217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
17218 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
17219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
17220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
17222 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
17223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
17224 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
17225 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
17227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
17228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
17229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
17231 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
17234 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
17235 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
17236 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
17237 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
17238 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
17239 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
17240 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
17241 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
17243 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
17244 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
17245 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
17246 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
17247 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
17248 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
17249 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
17250 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
17252 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
17253 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
17255 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
17256 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
17257 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
17258 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
17260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
17261 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
17263 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
17264 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
17265 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
17266 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
17267 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
17268 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
17270 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
17271 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
17272 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
17273 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
17275 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
17276 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
17277 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
17278 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
17279 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
17280 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
17281 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
17282 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
17284 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
17285 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
17286 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
17288 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
17289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
17291 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
17292 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
17294 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
17295 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
17296 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
17298 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
17299 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
17300 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
17302 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
17303 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
17305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
17307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
17308 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
17309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
17310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
17313 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
17314 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
17315 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
17316 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
17317 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
17318 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
17321 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
17322 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
17323 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
17324 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
17325 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
17326 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
17327 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
17328 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
17329 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
17330 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
17331 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
17332 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
17333 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
17334 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
17335 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
17336 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
17337 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
17338 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
17339 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
17340 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
17341 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
17342 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
17343 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
17344 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
17347 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
17348 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
17349 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
17350 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
17351 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
17352 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
17353 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
17354 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
17355 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
17356 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
17357 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
17358 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
17361 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
17364 static const struct builtin_description bdesc_1arg[] =
17366 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
17367 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
17369 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
17370 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
17371 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
17373 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
17374 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
17375 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
17376 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
17377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
17378 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
17380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
17381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
17383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
17385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
17386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
17388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
17389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
17390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
17391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
17392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
17394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
17396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
17397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
17398 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
17399 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
17401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
17402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
17403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
17406 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
17407 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
17410 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
17411 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
17412 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
17413 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
17414 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
17415 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
17418 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
17419 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
17420 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
17421 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
17422 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
17423 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
17424 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
17425 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
17426 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
17427 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
17428 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
17429 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
17430 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
17432 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
17433 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
17434 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
17437 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17438 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17441 ix86_init_mmx_sse_builtins (void)
17443 const struct builtin_description * d;
17446 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
17447 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17448 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
17449 tree V2DI_type_node
17450 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
17451 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
17452 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
17453 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
17454 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17455 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
17456 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
17458 tree pchar_type_node = build_pointer_type (char_type_node);
17459 tree pcchar_type_node = build_pointer_type (
17460 build_type_variant (char_type_node, 1, 0));
17461 tree pfloat_type_node = build_pointer_type (float_type_node);
17462 tree pcfloat_type_node = build_pointer_type (
17463 build_type_variant (float_type_node, 1, 0));
17464 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
17465 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
17466 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
17469 tree int_ftype_v4sf_v4sf
17470 = build_function_type_list (integer_type_node,
17471 V4SF_type_node, V4SF_type_node, NULL_TREE);
17472 tree v4si_ftype_v4sf_v4sf
17473 = build_function_type_list (V4SI_type_node,
17474 V4SF_type_node, V4SF_type_node, NULL_TREE);
17475 /* MMX/SSE/integer conversions. */
17476 tree int_ftype_v4sf
17477 = build_function_type_list (integer_type_node,
17478 V4SF_type_node, NULL_TREE);
17479 tree int64_ftype_v4sf
17480 = build_function_type_list (long_long_integer_type_node,
17481 V4SF_type_node, NULL_TREE);
17482 tree int_ftype_v8qi
17483 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
17484 tree v4sf_ftype_v4sf_int
17485 = build_function_type_list (V4SF_type_node,
17486 V4SF_type_node, integer_type_node, NULL_TREE);
17487 tree v4sf_ftype_v4sf_int64
17488 = build_function_type_list (V4SF_type_node,
17489 V4SF_type_node, long_long_integer_type_node,
17491 tree v4sf_ftype_v4sf_v2si
17492 = build_function_type_list (V4SF_type_node,
17493 V4SF_type_node, V2SI_type_node, NULL_TREE);
17495 /* Miscellaneous. */
17496 tree v8qi_ftype_v4hi_v4hi
17497 = build_function_type_list (V8QI_type_node,
17498 V4HI_type_node, V4HI_type_node, NULL_TREE);
17499 tree v4hi_ftype_v2si_v2si
17500 = build_function_type_list (V4HI_type_node,
17501 V2SI_type_node, V2SI_type_node, NULL_TREE);
17502 tree v4sf_ftype_v4sf_v4sf_int
17503 = build_function_type_list (V4SF_type_node,
17504 V4SF_type_node, V4SF_type_node,
17505 integer_type_node, NULL_TREE);
17506 tree v2si_ftype_v4hi_v4hi
17507 = build_function_type_list (V2SI_type_node,
17508 V4HI_type_node, V4HI_type_node, NULL_TREE);
17509 tree v4hi_ftype_v4hi_int
17510 = build_function_type_list (V4HI_type_node,
17511 V4HI_type_node, integer_type_node, NULL_TREE);
17512 tree v4hi_ftype_v4hi_di
17513 = build_function_type_list (V4HI_type_node,
17514 V4HI_type_node, long_long_unsigned_type_node,
17516 tree v2si_ftype_v2si_di
17517 = build_function_type_list (V2SI_type_node,
17518 V2SI_type_node, long_long_unsigned_type_node,
17520 tree void_ftype_void
17521 = build_function_type (void_type_node, void_list_node);
17522 tree void_ftype_unsigned
17523 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
17524 tree void_ftype_unsigned_unsigned
17525 = build_function_type_list (void_type_node, unsigned_type_node,
17526 unsigned_type_node, NULL_TREE);
17527 tree void_ftype_pcvoid_unsigned_unsigned
17528 = build_function_type_list (void_type_node, const_ptr_type_node,
17529 unsigned_type_node, unsigned_type_node,
17531 tree unsigned_ftype_void
17532 = build_function_type (unsigned_type_node, void_list_node);
17533 tree v2si_ftype_v4sf
17534 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
17535 /* Loads/stores. */
17536 tree void_ftype_v8qi_v8qi_pchar
17537 = build_function_type_list (void_type_node,
17538 V8QI_type_node, V8QI_type_node,
17539 pchar_type_node, NULL_TREE);
17540 tree v4sf_ftype_pcfloat
17541 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
17542 /* @@@ the type is bogus */
17543 tree v4sf_ftype_v4sf_pv2si
17544 = build_function_type_list (V4SF_type_node,
17545 V4SF_type_node, pv2si_type_node, NULL_TREE);
17546 tree void_ftype_pv2si_v4sf
17547 = build_function_type_list (void_type_node,
17548 pv2si_type_node, V4SF_type_node, NULL_TREE);
17549 tree void_ftype_pfloat_v4sf
17550 = build_function_type_list (void_type_node,
17551 pfloat_type_node, V4SF_type_node, NULL_TREE);
17552 tree void_ftype_pdi_di
17553 = build_function_type_list (void_type_node,
17554 pdi_type_node, long_long_unsigned_type_node,
17556 tree void_ftype_pv2di_v2di
17557 = build_function_type_list (void_type_node,
17558 pv2di_type_node, V2DI_type_node, NULL_TREE);
17559 /* Normal vector unops. */
17560 tree v4sf_ftype_v4sf
17561 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17562 tree v16qi_ftype_v16qi
17563 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17564 tree v8hi_ftype_v8hi
17565 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17566 tree v4si_ftype_v4si
17567 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17568 tree v8qi_ftype_v8qi
17569 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
17570 tree v4hi_ftype_v4hi
17571 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
17573 /* Normal vector binops. */
17574 tree v4sf_ftype_v4sf_v4sf
17575 = build_function_type_list (V4SF_type_node,
17576 V4SF_type_node, V4SF_type_node, NULL_TREE);
17577 tree v8qi_ftype_v8qi_v8qi
17578 = build_function_type_list (V8QI_type_node,
17579 V8QI_type_node, V8QI_type_node, NULL_TREE);
17580 tree v4hi_ftype_v4hi_v4hi
17581 = build_function_type_list (V4HI_type_node,
17582 V4HI_type_node, V4HI_type_node, NULL_TREE);
17583 tree v2si_ftype_v2si_v2si
17584 = build_function_type_list (V2SI_type_node,
17585 V2SI_type_node, V2SI_type_node, NULL_TREE);
17586 tree di_ftype_di_di
17587 = build_function_type_list (long_long_unsigned_type_node,
17588 long_long_unsigned_type_node,
17589 long_long_unsigned_type_node, NULL_TREE);
17591 tree di_ftype_di_di_int
17592 = build_function_type_list (long_long_unsigned_type_node,
17593 long_long_unsigned_type_node,
17594 long_long_unsigned_type_node,
17595 integer_type_node, NULL_TREE);
17597 tree v2si_ftype_v2sf
17598 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
17599 tree v2sf_ftype_v2si
17600 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
17601 tree v2si_ftype_v2si
17602 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
17603 tree v2sf_ftype_v2sf
17604 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
17605 tree v2sf_ftype_v2sf_v2sf
17606 = build_function_type_list (V2SF_type_node,
17607 V2SF_type_node, V2SF_type_node, NULL_TREE);
17608 tree v2si_ftype_v2sf_v2sf
17609 = build_function_type_list (V2SI_type_node,
17610 V2SF_type_node, V2SF_type_node, NULL_TREE);
17611 tree pint_type_node = build_pointer_type (integer_type_node);
17612 tree pdouble_type_node = build_pointer_type (double_type_node);
17613 tree pcdouble_type_node = build_pointer_type (
17614 build_type_variant (double_type_node, 1, 0));
17615 tree int_ftype_v2df_v2df
17616 = build_function_type_list (integer_type_node,
17617 V2DF_type_node, V2DF_type_node, NULL_TREE);
17619 tree void_ftype_pcvoid
17620 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
17621 tree v4sf_ftype_v4si
17622 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
17623 tree v4si_ftype_v4sf
17624 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
17625 tree v2df_ftype_v4si
17626 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
17627 tree v4si_ftype_v2df
17628 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17629 tree v4si_ftype_v2df_v2df
17630 = build_function_type_list (V4SI_type_node,
17631 V2DF_type_node, V2DF_type_node, NULL_TREE);
17632 tree v2si_ftype_v2df
17633 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17634 tree v4sf_ftype_v2df
17635 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17636 tree v2df_ftype_v2si
17637 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17638 tree v2df_ftype_v4sf
17639 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17640 tree int_ftype_v2df
17641 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17642 tree int64_ftype_v2df
17643 = build_function_type_list (long_long_integer_type_node,
17644 V2DF_type_node, NULL_TREE);
17645 tree v2df_ftype_v2df_int
17646 = build_function_type_list (V2DF_type_node,
17647 V2DF_type_node, integer_type_node, NULL_TREE);
17648 tree v2df_ftype_v2df_int64
17649 = build_function_type_list (V2DF_type_node,
17650 V2DF_type_node, long_long_integer_type_node,
17652 tree v4sf_ftype_v4sf_v2df
17653 = build_function_type_list (V4SF_type_node,
17654 V4SF_type_node, V2DF_type_node, NULL_TREE);
17655 tree v2df_ftype_v2df_v4sf
17656 = build_function_type_list (V2DF_type_node,
17657 V2DF_type_node, V4SF_type_node, NULL_TREE);
17658 tree v2df_ftype_v2df_v2df_int
17659 = build_function_type_list (V2DF_type_node,
17660 V2DF_type_node, V2DF_type_node,
17663 tree v2df_ftype_v2df_pcdouble
17664 = build_function_type_list (V2DF_type_node,
17665 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17666 tree void_ftype_pdouble_v2df
17667 = build_function_type_list (void_type_node,
17668 pdouble_type_node, V2DF_type_node, NULL_TREE);
17669 tree void_ftype_pint_int
17670 = build_function_type_list (void_type_node,
17671 pint_type_node, integer_type_node, NULL_TREE);
17672 tree void_ftype_v16qi_v16qi_pchar
17673 = build_function_type_list (void_type_node,
17674 V16QI_type_node, V16QI_type_node,
17675 pchar_type_node, NULL_TREE);
17676 tree v2df_ftype_pcdouble
17677 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17678 tree v2df_ftype_v2df_v2df
17679 = build_function_type_list (V2DF_type_node,
17680 V2DF_type_node, V2DF_type_node, NULL_TREE);
17681 tree v16qi_ftype_v16qi_v16qi
17682 = build_function_type_list (V16QI_type_node,
17683 V16QI_type_node, V16QI_type_node, NULL_TREE);
17684 tree v8hi_ftype_v8hi_v8hi
17685 = build_function_type_list (V8HI_type_node,
17686 V8HI_type_node, V8HI_type_node, NULL_TREE);
17687 tree v4si_ftype_v4si_v4si
17688 = build_function_type_list (V4SI_type_node,
17689 V4SI_type_node, V4SI_type_node, NULL_TREE);
17690 tree v2di_ftype_v2di_v2di
17691 = build_function_type_list (V2DI_type_node,
17692 V2DI_type_node, V2DI_type_node, NULL_TREE);
17693 tree v2di_ftype_v2df_v2df
17694 = build_function_type_list (V2DI_type_node,
17695 V2DF_type_node, V2DF_type_node, NULL_TREE);
17696 tree v2df_ftype_v2df
17697 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17698 tree v2di_ftype_v2di_int
17699 = build_function_type_list (V2DI_type_node,
17700 V2DI_type_node, integer_type_node, NULL_TREE);
17701 tree v2di_ftype_v2di_v2di_int
17702 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17703 V2DI_type_node, integer_type_node, NULL_TREE);
17704 tree v4si_ftype_v4si_int
17705 = build_function_type_list (V4SI_type_node,
17706 V4SI_type_node, integer_type_node, NULL_TREE);
17707 tree v8hi_ftype_v8hi_int
17708 = build_function_type_list (V8HI_type_node,
17709 V8HI_type_node, integer_type_node, NULL_TREE);
17710 tree v4si_ftype_v8hi_v8hi
17711 = build_function_type_list (V4SI_type_node,
17712 V8HI_type_node, V8HI_type_node, NULL_TREE);
17713 tree di_ftype_v8qi_v8qi
17714 = build_function_type_list (long_long_unsigned_type_node,
17715 V8QI_type_node, V8QI_type_node, NULL_TREE);
17716 tree di_ftype_v2si_v2si
17717 = build_function_type_list (long_long_unsigned_type_node,
17718 V2SI_type_node, V2SI_type_node, NULL_TREE);
17719 tree v2di_ftype_v16qi_v16qi
17720 = build_function_type_list (V2DI_type_node,
17721 V16QI_type_node, V16QI_type_node, NULL_TREE);
17722 tree v2di_ftype_v4si_v4si
17723 = build_function_type_list (V2DI_type_node,
17724 V4SI_type_node, V4SI_type_node, NULL_TREE);
17725 tree int_ftype_v16qi
17726 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17727 tree v16qi_ftype_pcchar
17728 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17729 tree void_ftype_pchar_v16qi
17730 = build_function_type_list (void_type_node,
17731 pchar_type_node, V16QI_type_node, NULL_TREE);
17733 tree v2di_ftype_v2di_unsigned_unsigned
17734 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17735 unsigned_type_node, unsigned_type_node,
17737 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17738 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17739 unsigned_type_node, unsigned_type_node,
17741 tree v2di_ftype_v2di_v16qi
17742 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17744 tree v2df_ftype_v2df_v2df_v2df
17745 = build_function_type_list (V2DF_type_node,
17746 V2DF_type_node, V2DF_type_node,
17747 V2DF_type_node, NULL_TREE);
17748 tree v4sf_ftype_v4sf_v4sf_v4sf
17749 = build_function_type_list (V4SF_type_node,
17750 V4SF_type_node, V4SF_type_node,
17751 V4SF_type_node, NULL_TREE);
17752 tree v8hi_ftype_v16qi
17753 = build_function_type_list (V8HI_type_node, V16QI_type_node,
17755 tree v4si_ftype_v16qi
17756 = build_function_type_list (V4SI_type_node, V16QI_type_node,
17758 tree v2di_ftype_v16qi
17759 = build_function_type_list (V2DI_type_node, V16QI_type_node,
17761 tree v4si_ftype_v8hi
17762 = build_function_type_list (V4SI_type_node, V8HI_type_node,
17764 tree v2di_ftype_v8hi
17765 = build_function_type_list (V2DI_type_node, V8HI_type_node,
17767 tree v2di_ftype_v4si
17768 = build_function_type_list (V2DI_type_node, V4SI_type_node,
17770 tree v2di_ftype_pv2di
17771 = build_function_type_list (V2DI_type_node, pv2di_type_node,
17773 tree v16qi_ftype_v16qi_v16qi_int
17774 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17775 V16QI_type_node, integer_type_node,
17777 tree v16qi_ftype_v16qi_v16qi_v16qi
17778 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17779 V16QI_type_node, V16QI_type_node,
17781 tree v8hi_ftype_v8hi_v8hi_int
17782 = build_function_type_list (V8HI_type_node, V8HI_type_node,
17783 V8HI_type_node, integer_type_node,
17785 tree v4si_ftype_v4si_v4si_int
17786 = build_function_type_list (V4SI_type_node, V4SI_type_node,
17787 V4SI_type_node, integer_type_node,
17789 tree int_ftype_v2di_v2di
17790 = build_function_type_list (integer_type_node,
17791 V2DI_type_node, V2DI_type_node,
17793 tree int_ftype_v16qi_int_v16qi_int_int
17794 = build_function_type_list (integer_type_node,
17801 tree v16qi_ftype_v16qi_int_v16qi_int_int
17802 = build_function_type_list (V16QI_type_node,
17809 tree int_ftype_v16qi_v16qi_int
17810 = build_function_type_list (integer_type_node,
17817 /* The __float80 type. */
17818 if (TYPE_MODE (long_double_type_node) == XFmode)
17819 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17823 /* The __float80 type. */
17824 tree float80_type_node = make_node (REAL_TYPE);
17826 TYPE_PRECISION (float80_type_node) = 80;
17827 layout_type (float80_type_node);
17828 (*lang_hooks.types.register_builtin_type) (float80_type_node,
17834 tree float128_type_node = make_node (REAL_TYPE);
17836 TYPE_PRECISION (float128_type_node) = 128;
17837 layout_type (float128_type_node);
17838 (*lang_hooks.types.register_builtin_type) (float128_type_node,
17841 /* TFmode support builtins. */
17842 ftype = build_function_type (float128_type_node,
17844 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
17846 ftype = build_function_type_list (float128_type_node,
17847 float128_type_node,
17849 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
17851 ftype = build_function_type_list (float128_type_node,
17852 float128_type_node,
17853 float128_type_node,
17855 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
17858 /* Add all SSE builtins that are more or less simple operations on
17860 for (i = 0, d = bdesc_sse_3arg;
17861 i < ARRAY_SIZE (bdesc_sse_3arg);
17864 /* Use one of the operands; the target can have a different mode for
17865 mask-generating compares. */
17866 enum machine_mode mode;
17871 mode = insn_data[d->icode].operand[1].mode;
17876 type = v16qi_ftype_v16qi_v16qi_int;
17879 type = v8hi_ftype_v8hi_v8hi_int;
17882 type = v4si_ftype_v4si_v4si_int;
17885 type = v2di_ftype_v2di_v2di_int;
17888 type = v2df_ftype_v2df_v2df_int;
17891 type = v4sf_ftype_v4sf_v4sf_int;
17894 gcc_unreachable ();
17897 /* Override for variable blends. */
17900 case CODE_FOR_sse4_1_blendvpd:
17901 type = v2df_ftype_v2df_v2df_v2df;
17903 case CODE_FOR_sse4_1_blendvps:
17904 type = v4sf_ftype_v4sf_v4sf_v4sf;
17906 case CODE_FOR_sse4_1_pblendvb:
17907 type = v16qi_ftype_v16qi_v16qi_v16qi;
17913 def_builtin_const (d->mask, d->name, type, d->code);
17916 /* Add all builtins that are more or less simple operations on two
17918 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17920 /* Use one of the operands; the target can have a different mode for
17921 mask-generating compares. */
17922 enum machine_mode mode;
17927 mode = insn_data[d->icode].operand[1].mode;
17932 type = v16qi_ftype_v16qi_v16qi;
17935 type = v8hi_ftype_v8hi_v8hi;
17938 type = v4si_ftype_v4si_v4si;
17941 type = v2di_ftype_v2di_v2di;
17944 type = v2df_ftype_v2df_v2df;
17947 type = v4sf_ftype_v4sf_v4sf;
17950 type = v8qi_ftype_v8qi_v8qi;
17953 type = v4hi_ftype_v4hi_v4hi;
17956 type = v2si_ftype_v2si_v2si;
17959 type = di_ftype_di_di;
17963 gcc_unreachable ();
17966 /* Override for comparisons. */
17967 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17968 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17969 type = v4si_ftype_v4sf_v4sf;
17971 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17972 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17973 type = v2di_ftype_v2df_v2df;
17975 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
17976 type = v4si_ftype_v2df_v2df;
17978 def_builtin_const (d->mask, d->name, type, d->code);
17981 /* Add all builtins that are more or less simple operations on 1 operand. */
17982 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17984 enum machine_mode mode;
17989 mode = insn_data[d->icode].operand[1].mode;
17994 type = v16qi_ftype_v16qi;
17997 type = v8hi_ftype_v8hi;
18000 type = v4si_ftype_v4si;
18003 type = v2df_ftype_v2df;
18006 type = v4sf_ftype_v4sf;
18009 type = v8qi_ftype_v8qi;
18012 type = v4hi_ftype_v4hi;
18015 type = v2si_ftype_v2si;
18022 def_builtin_const (d->mask, d->name, type, d->code);
18025 /* pcmpestr[im] insns. */
18026 for (i = 0, d = bdesc_pcmpestr;
18027 i < ARRAY_SIZE (bdesc_pcmpestr);
18030 if (d->code == IX86_BUILTIN_PCMPESTRM128)
18031 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
18033 ftype = int_ftype_v16qi_int_v16qi_int_int;
18034 def_builtin_const (d->mask, d->name, ftype, d->code);
18037 /* pcmpistr[im] insns. */
18038 for (i = 0, d = bdesc_pcmpistr;
18039 i < ARRAY_SIZE (bdesc_pcmpistr);
18042 if (d->code == IX86_BUILTIN_PCMPISTRM128)
18043 ftype = v16qi_ftype_v16qi_v16qi_int;
18045 ftype = int_ftype_v16qi_v16qi_int;
18046 def_builtin_const (d->mask, d->name, ftype, d->code);
18049 /* Add the remaining MMX insns with somewhat more complicated types. */
18050 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
18051 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
18052 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
18053 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
18055 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
18056 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
18057 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
18059 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
18060 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
18062 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
18063 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
18065 /* comi/ucomi insns. */
18066 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18067 if (d->mask == OPTION_MASK_ISA_SSE2)
18068 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
18070 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
18073 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
18074 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
18076 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
18077 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
18078 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
18080 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
18081 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
18082 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
18083 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
18084 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
18085 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
18086 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
18087 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
18088 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
18089 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
18090 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
18092 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
18094 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
18095 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
18097 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
18098 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
18099 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
18100 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
18102 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
18103 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
18104 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
18105 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
18107 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
18109 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
18111 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
18112 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
18113 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
18114 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
18115 ftype = build_function_type_list (float_type_node,
18118 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
18119 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
18120 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
18122 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
18124 /* Original 3DNow! */
18125 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
18126 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
18127 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
18128 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
18129 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
18130 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
18131 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
18132 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
18133 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
18134 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
18135 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
18136 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
18137 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
18138 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
18139 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
18140 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
18141 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
18142 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
18143 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
18144 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
18146 /* 3DNow! extension as used in the Athlon CPU. */
18147 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
18148 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
18149 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
18150 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
18151 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
18152 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
18155 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
18157 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
18158 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
18160 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
18161 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
18163 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
18164 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
18165 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
18166 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
18167 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
18169 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
18170 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
18171 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
18172 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
18174 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
18175 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
18177 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
18179 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
18180 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
18182 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
18183 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
18184 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
18185 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
18186 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
18188 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
18190 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
18191 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
18192 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
18193 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
18195 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
18196 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
18197 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
18199 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
18200 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
18201 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
18202 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
18204 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
18205 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
18206 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
18208 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
18209 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
18211 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
18212 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
18214 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
18215 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
18216 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
18217 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
18218 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
18219 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
18220 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
18222 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
18223 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
18224 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
18225 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
18226 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
18227 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
18228 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
18230 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
18231 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
18232 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
18233 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
18235 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
18237 /* Prescott New Instructions. */
18238 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
18239 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
18240 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
18243 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
18244 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
18247 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
18248 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
18249 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
18250 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
18251 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
18252 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
18253 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
18254 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
18255 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
18256 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
18257 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
18258 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
18259 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
18260 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
18261 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
18262 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
18263 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
18264 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
18267 ftype = build_function_type_list (unsigned_type_node,
18268 unsigned_type_node,
18269 unsigned_char_type_node,
18271 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
18272 ftype = build_function_type_list (unsigned_type_node,
18273 unsigned_type_node,
18274 short_unsigned_type_node,
18276 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
18277 ftype = build_function_type_list (unsigned_type_node,
18278 unsigned_type_node,
18279 unsigned_type_node,
18281 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
18282 ftype = build_function_type_list (long_long_unsigned_type_node,
18283 long_long_unsigned_type_node,
18284 long_long_unsigned_type_node,
18286 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
18288 /* AMDFAM10 SSE4A New built-ins */
18289 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
18290 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
18291 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
18292 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
18293 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
18294 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
18296 /* Access to the vec_init patterns. */
18297 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
18298 integer_type_node, NULL_TREE);
18299 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
18301 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
18302 short_integer_type_node,
18303 short_integer_type_node,
18304 short_integer_type_node, NULL_TREE);
18305 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
18307 ftype = build_function_type_list (V8QI_type_node, char_type_node,
18308 char_type_node, char_type_node,
18309 char_type_node, char_type_node,
18310 char_type_node, char_type_node,
18311 char_type_node, NULL_TREE);
18312 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
18314 /* Access to the vec_extract patterns. */
18315 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18316 integer_type_node, NULL_TREE);
18317 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
18319 ftype = build_function_type_list (long_long_integer_type_node,
18320 V2DI_type_node, integer_type_node,
18322 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
18324 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18325 integer_type_node, NULL_TREE);
18326 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
18328 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18329 integer_type_node, NULL_TREE);
18330 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
18332 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18333 integer_type_node, NULL_TREE);
18334 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
18336 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
18337 integer_type_node, NULL_TREE);
18338 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
18340 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
18341 integer_type_node, NULL_TREE);
18342 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
18344 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18345 integer_type_node, NULL_TREE);
18346 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
18348 /* Access to the vec_set patterns. */
18349 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18351 integer_type_node, NULL_TREE);
18352 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
18354 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18356 integer_type_node, NULL_TREE);
18357 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
18359 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18361 integer_type_node, NULL_TREE);
18362 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
18364 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18366 integer_type_node, NULL_TREE);
18367 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
18369 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
18371 integer_type_node, NULL_TREE);
18372 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
18374 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18376 integer_type_node, NULL_TREE);
18377 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
18381 ix86_init_builtins (void)
18384 ix86_init_mmx_sse_builtins ();
18387 /* Errors in the source file can cause expand_expr to return const0_rtx
18388 where we expect a vector. To avoid crashing, use one of the vector
18389 clear instructions. */
18391 safe_vector_operand (rtx x, enum machine_mode mode)
18393 if (x == const0_rtx)
18394 x = CONST0_RTX (mode);
18398 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
18399 4 operands. The third argument must be a constant smaller than 8
18403 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
18407 tree arg0 = CALL_EXPR_ARG (exp, 0);
18408 tree arg1 = CALL_EXPR_ARG (exp, 1);
18409 tree arg2 = CALL_EXPR_ARG (exp, 2);
18410 rtx op0 = expand_normal (arg0);
18411 rtx op1 = expand_normal (arg1);
18412 rtx op2 = expand_normal (arg2);
18413 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18414 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18415 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
18416 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
18418 if (VECTOR_MODE_P (mode1))
18419 op0 = safe_vector_operand (op0, mode1);
18420 if (VECTOR_MODE_P (mode2))
18421 op1 = safe_vector_operand (op1, mode2);
18422 if (VECTOR_MODE_P (mode3))
18423 op2 = safe_vector_operand (op2, mode3);
18427 || GET_MODE (target) != tmode
18428 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18429 target = gen_reg_rtx (tmode);
18431 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18432 op0 = copy_to_mode_reg (mode1, op0);
18433 if ((optimize && !register_operand (op1, mode2))
18434 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
18435 op1 = copy_to_mode_reg (mode2, op1);
18437 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18440 case CODE_FOR_sse4_1_blendvpd:
18441 case CODE_FOR_sse4_1_blendvps:
18442 case CODE_FOR_sse4_1_pblendvb:
18443 op2 = copy_to_mode_reg (mode3, op2);
18446 case CODE_FOR_sse4_1_roundsd:
18447 case CODE_FOR_sse4_1_roundss:
18448 error ("the third argument must be a 4-bit immediate");
18452 error ("the third argument must be an 8-bit immediate");
18456 pat = GEN_FCN (icode) (target, op0, op1, op2);
18463 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
18466 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
18469 tree arg0 = CALL_EXPR_ARG (exp, 0);
18470 tree arg1 = CALL_EXPR_ARG (exp, 1);
18471 rtx op0 = expand_normal (arg0);
18472 rtx op1 = expand_normal (arg1);
18473 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18474 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18475 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18479 || GET_MODE (target) != tmode
18480 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18481 target = gen_reg_rtx (tmode);
18483 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18484 op0 = copy_to_mode_reg (mode0, op0);
18485 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18487 op1 = copy_to_reg (op1);
18488 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
18491 pat = GEN_FCN (icode) (target, op0, op1);
18498 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
18501 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
18504 tree arg0 = CALL_EXPR_ARG (exp, 0);
18505 tree arg1 = CALL_EXPR_ARG (exp, 1);
18506 rtx op0 = expand_normal (arg0);
18507 rtx op1 = expand_normal (arg1);
18508 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18509 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18510 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18512 if (VECTOR_MODE_P (mode0))
18513 op0 = safe_vector_operand (op0, mode0);
18514 if (VECTOR_MODE_P (mode1))
18515 op1 = safe_vector_operand (op1, mode1);
18517 if (optimize || !target
18518 || GET_MODE (target) != tmode
18519 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18520 target = gen_reg_rtx (tmode);
18522 if (GET_MODE (op1) == SImode && mode1 == TImode)
18524 rtx x = gen_reg_rtx (V4SImode);
18525 emit_insn (gen_sse2_loadd (x, op1));
18526 op1 = gen_lowpart (TImode, x);
18529 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18530 op0 = copy_to_mode_reg (mode0, op0);
18531 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18532 op1 = copy_to_mode_reg (mode1, op1);
18534 /* ??? Using ix86_fixup_binary_operands is problematic when
18535 we've got mismatched modes. Fake it. */
18541 if (tmode == mode0 && tmode == mode1)
18543 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
18547 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
18549 op0 = force_reg (mode0, op0);
18550 op1 = force_reg (mode1, op1);
18551 target = gen_reg_rtx (tmode);
18554 pat = GEN_FCN (icode) (target, op0, op1);
18561 /* Subroutine of ix86_expand_builtin to take care of stores. */
18564 ix86_expand_store_builtin (enum insn_code icode, tree exp)
18567 tree arg0 = CALL_EXPR_ARG (exp, 0);
18568 tree arg1 = CALL_EXPR_ARG (exp, 1);
18569 rtx op0 = expand_normal (arg0);
18570 rtx op1 = expand_normal (arg1);
18571 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
18572 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18574 if (VECTOR_MODE_P (mode1))
18575 op1 = safe_vector_operand (op1, mode1);
18577 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18578 op1 = copy_to_mode_reg (mode1, op1);
18580 pat = GEN_FCN (icode) (op0, op1);
18586 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18589 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
18590 rtx target, int do_load)
18593 tree arg0 = CALL_EXPR_ARG (exp, 0);
18594 rtx op0 = expand_normal (arg0);
18595 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18596 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18598 if (optimize || !target
18599 || GET_MODE (target) != tmode
18600 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18601 target = gen_reg_rtx (tmode);
18603 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18606 if (VECTOR_MODE_P (mode0))
18607 op0 = safe_vector_operand (op0, mode0);
18609 if ((optimize && !register_operand (op0, mode0))
18610 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18611 op0 = copy_to_mode_reg (mode0, op0);
18616 case CODE_FOR_sse4_1_roundpd:
18617 case CODE_FOR_sse4_1_roundps:
18619 tree arg1 = CALL_EXPR_ARG (exp, 1);
18620 rtx op1 = expand_normal (arg1);
18621 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18623 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18625 error ("the second argument must be a 4-bit immediate");
18628 pat = GEN_FCN (icode) (target, op0, op1);
18632 pat = GEN_FCN (icode) (target, op0);
18642 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18643 sqrtss, rsqrtss, rcpss. */
18646 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
18649 tree arg0 = CALL_EXPR_ARG (exp, 0);
18650 rtx op1, op0 = expand_normal (arg0);
18651 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18652 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18654 if (optimize || !target
18655 || GET_MODE (target) != tmode
18656 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18657 target = gen_reg_rtx (tmode);
18659 if (VECTOR_MODE_P (mode0))
18660 op0 = safe_vector_operand (op0, mode0);
18662 if ((optimize && !register_operand (op0, mode0))
18663 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18664 op0 = copy_to_mode_reg (mode0, op0);
18667 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
18668 op1 = copy_to_mode_reg (mode0, op1);
18670 pat = GEN_FCN (icode) (target, op0, op1);
18677 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18680 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
18684 tree arg0 = CALL_EXPR_ARG (exp, 0);
18685 tree arg1 = CALL_EXPR_ARG (exp, 1);
18686 rtx op0 = expand_normal (arg0);
18687 rtx op1 = expand_normal (arg1);
18689 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
18690 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
18691 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
18692 enum rtx_code comparison = d->comparison;
18694 if (VECTOR_MODE_P (mode0))
18695 op0 = safe_vector_operand (op0, mode0);
18696 if (VECTOR_MODE_P (mode1))
18697 op1 = safe_vector_operand (op1, mode1);
18699 /* Swap operands if we have a comparison that isn't available in
18701 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18703 rtx tmp = gen_reg_rtx (mode1);
18704 emit_move_insn (tmp, op1);
18709 if (optimize || !target
18710 || GET_MODE (target) != tmode
18711 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
18712 target = gen_reg_rtx (tmode);
18714 if ((optimize && !register_operand (op0, mode0))
18715 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
18716 op0 = copy_to_mode_reg (mode0, op0);
18717 if ((optimize && !register_operand (op1, mode1))
18718 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
18719 op1 = copy_to_mode_reg (mode1, op1);
18721 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
18722 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
18729 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18732 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
18736 tree arg0 = CALL_EXPR_ARG (exp, 0);
18737 tree arg1 = CALL_EXPR_ARG (exp, 1);
18738 rtx op0 = expand_normal (arg0);
18739 rtx op1 = expand_normal (arg1);
18740 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18741 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18742 enum rtx_code comparison = d->comparison;
18744 if (VECTOR_MODE_P (mode0))
18745 op0 = safe_vector_operand (op0, mode0);
18746 if (VECTOR_MODE_P (mode1))
18747 op1 = safe_vector_operand (op1, mode1);
18749 /* Swap operands if we have a comparison that isn't available in
18751 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18758 target = gen_reg_rtx (SImode);
18759 emit_move_insn (target, const0_rtx);
18760 target = gen_rtx_SUBREG (QImode, target, 0);
18762 if ((optimize && !register_operand (op0, mode0))
18763 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18764 op0 = copy_to_mode_reg (mode0, op0);
18765 if ((optimize && !register_operand (op1, mode1))
18766 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18767 op1 = copy_to_mode_reg (mode1, op1);
18769 pat = GEN_FCN (d->icode) (op0, op1);
18773 emit_insn (gen_rtx_SET (VOIDmode,
18774 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18775 gen_rtx_fmt_ee (comparison, QImode,
18779 return SUBREG_REG (target);
18782 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18785 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
18789 tree arg0 = CALL_EXPR_ARG (exp, 0);
18790 tree arg1 = CALL_EXPR_ARG (exp, 1);
18791 rtx op0 = expand_normal (arg0);
18792 rtx op1 = expand_normal (arg1);
18793 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18794 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18795 enum rtx_code comparison = d->comparison;
18797 if (VECTOR_MODE_P (mode0))
18798 op0 = safe_vector_operand (op0, mode0);
18799 if (VECTOR_MODE_P (mode1))
18800 op1 = safe_vector_operand (op1, mode1);
18802 target = gen_reg_rtx (SImode);
18803 emit_move_insn (target, const0_rtx);
18804 target = gen_rtx_SUBREG (QImode, target, 0);
18806 if ((optimize && !register_operand (op0, mode0))
18807 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18808 op0 = copy_to_mode_reg (mode0, op0);
18809 if ((optimize && !register_operand (op1, mode1))
18810 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18811 op1 = copy_to_mode_reg (mode1, op1);
18813 pat = GEN_FCN (d->icode) (op0, op1);
18817 emit_insn (gen_rtx_SET (VOIDmode,
18818 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18819 gen_rtx_fmt_ee (comparison, QImode,
18823 return SUBREG_REG (target);
18826 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
18829 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
18830 tree exp, rtx target)
18833 tree arg0 = CALL_EXPR_ARG (exp, 0);
18834 tree arg1 = CALL_EXPR_ARG (exp, 1);
18835 tree arg2 = CALL_EXPR_ARG (exp, 2);
18836 tree arg3 = CALL_EXPR_ARG (exp, 3);
18837 tree arg4 = CALL_EXPR_ARG (exp, 4);
18838 rtx scratch0, scratch1;
18839 rtx op0 = expand_normal (arg0);
18840 rtx op1 = expand_normal (arg1);
18841 rtx op2 = expand_normal (arg2);
18842 rtx op3 = expand_normal (arg3);
18843 rtx op4 = expand_normal (arg4);
18844 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
18846 tmode0 = insn_data[d->icode].operand[0].mode;
18847 tmode1 = insn_data[d->icode].operand[1].mode;
18848 modev2 = insn_data[d->icode].operand[2].mode;
18849 modei3 = insn_data[d->icode].operand[3].mode;
18850 modev4 = insn_data[d->icode].operand[4].mode;
18851 modei5 = insn_data[d->icode].operand[5].mode;
18852 modeimm = insn_data[d->icode].operand[6].mode;
18854 if (VECTOR_MODE_P (modev2))
18855 op0 = safe_vector_operand (op0, modev2);
18856 if (VECTOR_MODE_P (modev4))
18857 op2 = safe_vector_operand (op2, modev4);
18859 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18860 op0 = copy_to_mode_reg (modev2, op0);
18861 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
18862 op1 = copy_to_mode_reg (modei3, op1);
18863 if ((optimize && !register_operand (op2, modev4))
18864 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
18865 op2 = copy_to_mode_reg (modev4, op2);
18866 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
18867 op3 = copy_to_mode_reg (modei5, op3);
18869 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
18871 error ("the fifth argument must be a 8-bit immediate");
18875 if (d->code == IX86_BUILTIN_PCMPESTRI128)
18877 if (optimize || !target
18878 || GET_MODE (target) != tmode0
18879 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18880 target = gen_reg_rtx (tmode0);
18882 scratch1 = gen_reg_rtx (tmode1);
18884 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
18886 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
18888 if (optimize || !target
18889 || GET_MODE (target) != tmode1
18890 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18891 target = gen_reg_rtx (tmode1);
18893 scratch0 = gen_reg_rtx (tmode0);
18895 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
18899 gcc_assert (d->flag);
18901 scratch0 = gen_reg_rtx (tmode0);
18902 scratch1 = gen_reg_rtx (tmode1);
18904 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
18914 target = gen_reg_rtx (SImode);
18915 emit_move_insn (target, const0_rtx);
18916 target = gen_rtx_SUBREG (QImode, target, 0);
18919 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18920 gen_rtx_fmt_ee (EQ, QImode,
18921 gen_rtx_REG ((enum machine_mode) d->flag,
18924 return SUBREG_REG (target);
18931 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
18934 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
18935 tree exp, rtx target)
18938 tree arg0 = CALL_EXPR_ARG (exp, 0);
18939 tree arg1 = CALL_EXPR_ARG (exp, 1);
18940 tree arg2 = CALL_EXPR_ARG (exp, 2);
18941 rtx scratch0, scratch1;
18942 rtx op0 = expand_normal (arg0);
18943 rtx op1 = expand_normal (arg1);
18944 rtx op2 = expand_normal (arg2);
18945 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
18947 tmode0 = insn_data[d->icode].operand[0].mode;
18948 tmode1 = insn_data[d->icode].operand[1].mode;
18949 modev2 = insn_data[d->icode].operand[2].mode;
18950 modev3 = insn_data[d->icode].operand[3].mode;
18951 modeimm = insn_data[d->icode].operand[4].mode;
18953 if (VECTOR_MODE_P (modev2))
18954 op0 = safe_vector_operand (op0, modev2);
18955 if (VECTOR_MODE_P (modev3))
18956 op1 = safe_vector_operand (op1, modev3);
18958 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18959 op0 = copy_to_mode_reg (modev2, op0);
18960 if ((optimize && !register_operand (op1, modev3))
18961 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
18962 op1 = copy_to_mode_reg (modev3, op1);
18964 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
18966 error ("the third argument must be a 8-bit immediate");
18970 if (d->code == IX86_BUILTIN_PCMPISTRI128)
18972 if (optimize || !target
18973 || GET_MODE (target) != tmode0
18974 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18975 target = gen_reg_rtx (tmode0);
18977 scratch1 = gen_reg_rtx (tmode1);
18979 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
18981 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
18983 if (optimize || !target
18984 || GET_MODE (target) != tmode1
18985 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18986 target = gen_reg_rtx (tmode1);
18988 scratch0 = gen_reg_rtx (tmode0);
18990 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
18994 gcc_assert (d->flag);
18996 scratch0 = gen_reg_rtx (tmode0);
18997 scratch1 = gen_reg_rtx (tmode1);
18999 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
19009 target = gen_reg_rtx (SImode);
19010 emit_move_insn (target, const0_rtx);
19011 target = gen_rtx_SUBREG (QImode, target, 0);
19014 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
19015 gen_rtx_fmt_ee (EQ, QImode,
19016 gen_rtx_REG ((enum machine_mode) d->flag,
19019 return SUBREG_REG (target);
19025 /* Return the integer constant in ARG. Constrain it to be in the range
19026 of the subparts of VEC_TYPE; issue an error if not. */
19029 get_element_number (tree vec_type, tree arg)
19031 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
19033 if (!host_integerp (arg, 1)
19034 || (elt = tree_low_cst (arg, 1), elt > max))
19036 error ("selector must be an integer constant in the range 0..%wi", max);
19043 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19044 ix86_expand_vector_init. We DO have language-level syntax for this, in
19045 the form of (type){ init-list }. Except that since we can't place emms
19046 instructions from inside the compiler, we can't allow the use of MMX
19047 registers unless the user explicitly asks for it. So we do *not* define
19048 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
19049 we have builtins invoked by mmintrin.h that gives us license to emit
19050 these sorts of instructions. */
19053 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
19055 enum machine_mode tmode = TYPE_MODE (type);
19056 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
19057 int i, n_elt = GET_MODE_NUNITS (tmode);
19058 rtvec v = rtvec_alloc (n_elt);
19060 gcc_assert (VECTOR_MODE_P (tmode));
19061 gcc_assert (call_expr_nargs (exp) == n_elt);
19063 for (i = 0; i < n_elt; ++i)
19065 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
19066 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
19069 if (!target || !register_operand (target, tmode))
19070 target = gen_reg_rtx (tmode);
19072 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
19076 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19077 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
19078 had a language-level syntax for referencing vector elements. */
19081 ix86_expand_vec_ext_builtin (tree exp, rtx target)
19083 enum machine_mode tmode, mode0;
19088 arg0 = CALL_EXPR_ARG (exp, 0);
19089 arg1 = CALL_EXPR_ARG (exp, 1);
19091 op0 = expand_normal (arg0);
19092 elt = get_element_number (TREE_TYPE (arg0), arg1);
19094 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19095 mode0 = TYPE_MODE (TREE_TYPE (arg0));
19096 gcc_assert (VECTOR_MODE_P (mode0));
19098 op0 = force_reg (mode0, op0);
19100 if (optimize || !target || !register_operand (target, tmode))
19101 target = gen_reg_rtx (tmode);
19103 ix86_expand_vector_extract (true, target, op0, elt);
19108 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19109 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
19110 a language-level syntax for referencing vector elements. */
19113 ix86_expand_vec_set_builtin (tree exp)
19115 enum machine_mode tmode, mode1;
19116 tree arg0, arg1, arg2;
19118 rtx op0, op1, target;
19120 arg0 = CALL_EXPR_ARG (exp, 0);
19121 arg1 = CALL_EXPR_ARG (exp, 1);
19122 arg2 = CALL_EXPR_ARG (exp, 2);
19124 tmode = TYPE_MODE (TREE_TYPE (arg0));
19125 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19126 gcc_assert (VECTOR_MODE_P (tmode));
19128 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
19129 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
19130 elt = get_element_number (TREE_TYPE (arg0), arg2);
19132 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
19133 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
19135 op0 = force_reg (tmode, op0);
19136 op1 = force_reg (mode1, op1);
19138 /* OP0 is the source of these builtin functions and shouldn't be
19139 modified. Create a copy, use it and return it as target. */
19140 target = gen_reg_rtx (tmode);
19141 emit_move_insn (target, op0);
19142 ix86_expand_vector_set (true, target, op1, elt);
19147 /* Expand an expression EXP that calls a built-in function,
19148 with result going to TARGET if that's convenient
19149 (and in mode MODE if that's convenient).
19150 SUBTARGET may be used as the target for computing one of EXP's operands.
19151 IGNORE is nonzero if the value is to be ignored. */
19154 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
19155 enum machine_mode mode ATTRIBUTE_UNUSED,
19156 int ignore ATTRIBUTE_UNUSED)
19158 const struct builtin_description *d;
19160 enum insn_code icode;
19161 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19162 tree arg0, arg1, arg2, arg3;
19163 rtx op0, op1, op2, op3, pat;
19164 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
19165 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
19169 case IX86_BUILTIN_EMMS:
19170 emit_insn (gen_mmx_emms ());
19173 case IX86_BUILTIN_SFENCE:
19174 emit_insn (gen_sse_sfence ());
19177 case IX86_BUILTIN_MASKMOVQ:
19178 case IX86_BUILTIN_MASKMOVDQU:
19179 icode = (fcode == IX86_BUILTIN_MASKMOVQ
19180 ? CODE_FOR_mmx_maskmovq
19181 : CODE_FOR_sse2_maskmovdqu);
19182 /* Note the arg order is different from the operand order. */
19183 arg1 = CALL_EXPR_ARG (exp, 0);
19184 arg2 = CALL_EXPR_ARG (exp, 1);
19185 arg0 = CALL_EXPR_ARG (exp, 2);
19186 op0 = expand_normal (arg0);
19187 op1 = expand_normal (arg1);
19188 op2 = expand_normal (arg2);
19189 mode0 = insn_data[icode].operand[0].mode;
19190 mode1 = insn_data[icode].operand[1].mode;
19191 mode2 = insn_data[icode].operand[2].mode;
19193 op0 = force_reg (Pmode, op0);
19194 op0 = gen_rtx_MEM (mode1, op0);
19196 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
19197 op0 = copy_to_mode_reg (mode0, op0);
19198 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
19199 op1 = copy_to_mode_reg (mode1, op1);
19200 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
19201 op2 = copy_to_mode_reg (mode2, op2);
19202 pat = GEN_FCN (icode) (op0, op1, op2);
19208 case IX86_BUILTIN_RSQRTF:
19209 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
19211 case IX86_BUILTIN_SQRTSS:
19212 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
19213 case IX86_BUILTIN_RSQRTSS:
19214 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
19215 case IX86_BUILTIN_RCPSS:
19216 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
19218 case IX86_BUILTIN_LOADUPS:
19219 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
19221 case IX86_BUILTIN_STOREUPS:
19222 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
19224 case IX86_BUILTIN_LOADHPS:
19225 case IX86_BUILTIN_LOADLPS:
19226 case IX86_BUILTIN_LOADHPD:
19227 case IX86_BUILTIN_LOADLPD:
19228 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
19229 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
19230 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
19231 : CODE_FOR_sse2_loadlpd);
19232 arg0 = CALL_EXPR_ARG (exp, 0);
19233 arg1 = CALL_EXPR_ARG (exp, 1);
19234 op0 = expand_normal (arg0);
19235 op1 = expand_normal (arg1);
19236 tmode = insn_data[icode].operand[0].mode;
19237 mode0 = insn_data[icode].operand[1].mode;
19238 mode1 = insn_data[icode].operand[2].mode;
19240 op0 = force_reg (mode0, op0);
19241 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
19242 if (optimize || target == 0
19243 || GET_MODE (target) != tmode
19244 || !register_operand (target, tmode))
19245 target = gen_reg_rtx (tmode);
19246 pat = GEN_FCN (icode) (target, op0, op1);
19252 case IX86_BUILTIN_STOREHPS:
19253 case IX86_BUILTIN_STORELPS:
19254 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
19255 : CODE_FOR_sse_storelps);
19256 arg0 = CALL_EXPR_ARG (exp, 0);
19257 arg1 = CALL_EXPR_ARG (exp, 1);
19258 op0 = expand_normal (arg0);
19259 op1 = expand_normal (arg1);
19260 mode0 = insn_data[icode].operand[0].mode;
19261 mode1 = insn_data[icode].operand[1].mode;
19263 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19264 op1 = force_reg (mode1, op1);
19266 pat = GEN_FCN (icode) (op0, op1);
19272 case IX86_BUILTIN_MOVNTPS:
19273 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
19274 case IX86_BUILTIN_MOVNTQ:
19275 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
19277 case IX86_BUILTIN_LDMXCSR:
19278 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
19279 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
19280 emit_move_insn (target, op0);
19281 emit_insn (gen_sse_ldmxcsr (target));
19284 case IX86_BUILTIN_STMXCSR:
19285 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
19286 emit_insn (gen_sse_stmxcsr (target));
19287 return copy_to_mode_reg (SImode, target);
19289 case IX86_BUILTIN_SHUFPS:
19290 case IX86_BUILTIN_SHUFPD:
19291 icode = (fcode == IX86_BUILTIN_SHUFPS
19292 ? CODE_FOR_sse_shufps
19293 : CODE_FOR_sse2_shufpd);
19294 arg0 = CALL_EXPR_ARG (exp, 0);
19295 arg1 = CALL_EXPR_ARG (exp, 1);
19296 arg2 = CALL_EXPR_ARG (exp, 2);
19297 op0 = expand_normal (arg0);
19298 op1 = expand_normal (arg1);
19299 op2 = expand_normal (arg2);
19300 tmode = insn_data[icode].operand[0].mode;
19301 mode0 = insn_data[icode].operand[1].mode;
19302 mode1 = insn_data[icode].operand[2].mode;
19303 mode2 = insn_data[icode].operand[3].mode;
19305 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19306 op0 = copy_to_mode_reg (mode0, op0);
19307 if ((optimize && !register_operand (op1, mode1))
19308 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
19309 op1 = copy_to_mode_reg (mode1, op1);
19310 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19312 /* @@@ better error message */
19313 error ("mask must be an immediate");
19314 return gen_reg_rtx (tmode);
19316 if (optimize || target == 0
19317 || GET_MODE (target) != tmode
19318 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19319 target = gen_reg_rtx (tmode);
19320 pat = GEN_FCN (icode) (target, op0, op1, op2);
19326 case IX86_BUILTIN_PSHUFW:
19327 case IX86_BUILTIN_PSHUFD:
19328 case IX86_BUILTIN_PSHUFHW:
19329 case IX86_BUILTIN_PSHUFLW:
19330 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
19331 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
19332 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
19333 : CODE_FOR_mmx_pshufw);
19334 arg0 = CALL_EXPR_ARG (exp, 0);
19335 arg1 = CALL_EXPR_ARG (exp, 1);
19336 op0 = expand_normal (arg0);
19337 op1 = expand_normal (arg1);
19338 tmode = insn_data[icode].operand[0].mode;
19339 mode1 = insn_data[icode].operand[1].mode;
19340 mode2 = insn_data[icode].operand[2].mode;
19342 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19343 op0 = copy_to_mode_reg (mode1, op0);
19344 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19346 /* @@@ better error message */
19347 error ("mask must be an immediate");
19351 || GET_MODE (target) != tmode
19352 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19353 target = gen_reg_rtx (tmode);
19354 pat = GEN_FCN (icode) (target, op0, op1);
19360 case IX86_BUILTIN_PSLLWI128:
19361 icode = CODE_FOR_ashlv8hi3;
19363 case IX86_BUILTIN_PSLLDI128:
19364 icode = CODE_FOR_ashlv4si3;
19366 case IX86_BUILTIN_PSLLQI128:
19367 icode = CODE_FOR_ashlv2di3;
19369 case IX86_BUILTIN_PSRAWI128:
19370 icode = CODE_FOR_ashrv8hi3;
19372 case IX86_BUILTIN_PSRADI128:
19373 icode = CODE_FOR_ashrv4si3;
19375 case IX86_BUILTIN_PSRLWI128:
19376 icode = CODE_FOR_lshrv8hi3;
19378 case IX86_BUILTIN_PSRLDI128:
19379 icode = CODE_FOR_lshrv4si3;
19381 case IX86_BUILTIN_PSRLQI128:
19382 icode = CODE_FOR_lshrv2di3;
19385 arg0 = CALL_EXPR_ARG (exp, 0);
19386 arg1 = CALL_EXPR_ARG (exp, 1);
19387 op0 = expand_normal (arg0);
19388 op1 = expand_normal (arg1);
19390 if (!CONST_INT_P (op1))
19392 error ("shift must be an immediate");
19395 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
19396 op1 = GEN_INT (255);
19398 tmode = insn_data[icode].operand[0].mode;
19399 mode1 = insn_data[icode].operand[1].mode;
19400 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19401 op0 = copy_to_reg (op0);
19403 target = gen_reg_rtx (tmode);
19404 pat = GEN_FCN (icode) (target, op0, op1);
19410 case IX86_BUILTIN_PSLLW128:
19411 icode = CODE_FOR_ashlv8hi3;
19413 case IX86_BUILTIN_PSLLD128:
19414 icode = CODE_FOR_ashlv4si3;
19416 case IX86_BUILTIN_PSLLQ128:
19417 icode = CODE_FOR_ashlv2di3;
19419 case IX86_BUILTIN_PSRAW128:
19420 icode = CODE_FOR_ashrv8hi3;
19422 case IX86_BUILTIN_PSRAD128:
19423 icode = CODE_FOR_ashrv4si3;
19425 case IX86_BUILTIN_PSRLW128:
19426 icode = CODE_FOR_lshrv8hi3;
19428 case IX86_BUILTIN_PSRLD128:
19429 icode = CODE_FOR_lshrv4si3;
19431 case IX86_BUILTIN_PSRLQ128:
19432 icode = CODE_FOR_lshrv2di3;
19435 arg0 = CALL_EXPR_ARG (exp, 0);
19436 arg1 = CALL_EXPR_ARG (exp, 1);
19437 op0 = expand_normal (arg0);
19438 op1 = expand_normal (arg1);
19440 tmode = insn_data[icode].operand[0].mode;
19441 mode1 = insn_data[icode].operand[1].mode;
19443 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19444 op0 = copy_to_reg (op0);
19446 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
19447 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
19448 op1 = copy_to_reg (op1);
19450 target = gen_reg_rtx (tmode);
19451 pat = GEN_FCN (icode) (target, op0, op1);
19457 case IX86_BUILTIN_PSLLDQI128:
19458 case IX86_BUILTIN_PSRLDQI128:
19459 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
19460 : CODE_FOR_sse2_lshrti3);
19461 arg0 = CALL_EXPR_ARG (exp, 0);
19462 arg1 = CALL_EXPR_ARG (exp, 1);
19463 op0 = expand_normal (arg0);
19464 op1 = expand_normal (arg1);
19465 tmode = insn_data[icode].operand[0].mode;
19466 mode1 = insn_data[icode].operand[1].mode;
19467 mode2 = insn_data[icode].operand[2].mode;
19469 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19471 op0 = copy_to_reg (op0);
19472 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19474 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19476 error ("shift must be an immediate");
19479 target = gen_reg_rtx (V2DImode);
19480 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
19487 case IX86_BUILTIN_FEMMS:
19488 emit_insn (gen_mmx_femms ());
19491 case IX86_BUILTIN_PAVGUSB:
19492 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
19494 case IX86_BUILTIN_PF2ID:
19495 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
19497 case IX86_BUILTIN_PFACC:
19498 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
19500 case IX86_BUILTIN_PFADD:
19501 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
19503 case IX86_BUILTIN_PFCMPEQ:
19504 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
19506 case IX86_BUILTIN_PFCMPGE:
19507 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
19509 case IX86_BUILTIN_PFCMPGT:
19510 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
19512 case IX86_BUILTIN_PFMAX:
19513 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
19515 case IX86_BUILTIN_PFMIN:
19516 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
19518 case IX86_BUILTIN_PFMUL:
19519 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
19521 case IX86_BUILTIN_PFRCP:
19522 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
19524 case IX86_BUILTIN_PFRCPIT1:
19525 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
19527 case IX86_BUILTIN_PFRCPIT2:
19528 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
19530 case IX86_BUILTIN_PFRSQIT1:
19531 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
19533 case IX86_BUILTIN_PFRSQRT:
19534 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
19536 case IX86_BUILTIN_PFSUB:
19537 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
19539 case IX86_BUILTIN_PFSUBR:
19540 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
19542 case IX86_BUILTIN_PI2FD:
19543 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
19545 case IX86_BUILTIN_PMULHRW:
19546 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
19548 case IX86_BUILTIN_PF2IW:
19549 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
19551 case IX86_BUILTIN_PFNACC:
19552 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
19554 case IX86_BUILTIN_PFPNACC:
19555 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
19557 case IX86_BUILTIN_PI2FW:
19558 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
19560 case IX86_BUILTIN_PSWAPDSI:
19561 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
19563 case IX86_BUILTIN_PSWAPDSF:
19564 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
19566 case IX86_BUILTIN_SQRTSD:
19567 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
19568 case IX86_BUILTIN_LOADUPD:
19569 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
19570 case IX86_BUILTIN_STOREUPD:
19571 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
19573 case IX86_BUILTIN_MFENCE:
19574 emit_insn (gen_sse2_mfence ());
19576 case IX86_BUILTIN_LFENCE:
19577 emit_insn (gen_sse2_lfence ());
19580 case IX86_BUILTIN_CLFLUSH:
19581 arg0 = CALL_EXPR_ARG (exp, 0);
19582 op0 = expand_normal (arg0);
19583 icode = CODE_FOR_sse2_clflush;
19584 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
19585 op0 = copy_to_mode_reg (Pmode, op0);
19587 emit_insn (gen_sse2_clflush (op0));
19590 case IX86_BUILTIN_MOVNTPD:
19591 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
19592 case IX86_BUILTIN_MOVNTDQ:
19593 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
19594 case IX86_BUILTIN_MOVNTI:
19595 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
19597 case IX86_BUILTIN_LOADDQU:
19598 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
19599 case IX86_BUILTIN_STOREDQU:
19600 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
19602 case IX86_BUILTIN_MONITOR:
19603 arg0 = CALL_EXPR_ARG (exp, 0);
19604 arg1 = CALL_EXPR_ARG (exp, 1);
19605 arg2 = CALL_EXPR_ARG (exp, 2);
19606 op0 = expand_normal (arg0);
19607 op1 = expand_normal (arg1);
19608 op2 = expand_normal (arg2);
19610 op0 = copy_to_mode_reg (Pmode, op0);
19612 op1 = copy_to_mode_reg (SImode, op1);
19614 op2 = copy_to_mode_reg (SImode, op2);
19616 emit_insn (gen_sse3_monitor (op0, op1, op2));
19618 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
19621 case IX86_BUILTIN_MWAIT:
19622 arg0 = CALL_EXPR_ARG (exp, 0);
19623 arg1 = CALL_EXPR_ARG (exp, 1);
19624 op0 = expand_normal (arg0);
19625 op1 = expand_normal (arg1);
19627 op0 = copy_to_mode_reg (SImode, op0);
19629 op1 = copy_to_mode_reg (SImode, op1);
19630 emit_insn (gen_sse3_mwait (op0, op1));
19633 case IX86_BUILTIN_LDDQU:
19634 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
19637 case IX86_BUILTIN_PALIGNR:
19638 case IX86_BUILTIN_PALIGNR128:
19639 if (fcode == IX86_BUILTIN_PALIGNR)
19641 icode = CODE_FOR_ssse3_palignrdi;
19646 icode = CODE_FOR_ssse3_palignrti;
19649 arg0 = CALL_EXPR_ARG (exp, 0);
19650 arg1 = CALL_EXPR_ARG (exp, 1);
19651 arg2 = CALL_EXPR_ARG (exp, 2);
19652 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19653 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19654 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19655 tmode = insn_data[icode].operand[0].mode;
19656 mode1 = insn_data[icode].operand[1].mode;
19657 mode2 = insn_data[icode].operand[2].mode;
19658 mode3 = insn_data[icode].operand[3].mode;
19660 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19662 op0 = copy_to_reg (op0);
19663 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19665 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19667 op1 = copy_to_reg (op1);
19668 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
19670 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19672 error ("shift must be an immediate");
19675 target = gen_reg_rtx (mode);
19676 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
19683 case IX86_BUILTIN_MOVNTDQA:
19684 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
19687 case IX86_BUILTIN_MOVNTSD:
19688 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
19690 case IX86_BUILTIN_MOVNTSS:
19691 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
19693 case IX86_BUILTIN_INSERTQ:
19694 case IX86_BUILTIN_EXTRQ:
19695 icode = (fcode == IX86_BUILTIN_EXTRQ
19696 ? CODE_FOR_sse4a_extrq
19697 : CODE_FOR_sse4a_insertq);
19698 arg0 = CALL_EXPR_ARG (exp, 0);
19699 arg1 = CALL_EXPR_ARG (exp, 1);
19700 op0 = expand_normal (arg0);
19701 op1 = expand_normal (arg1);
19702 tmode = insn_data[icode].operand[0].mode;
19703 mode1 = insn_data[icode].operand[1].mode;
19704 mode2 = insn_data[icode].operand[2].mode;
19705 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19706 op0 = copy_to_mode_reg (mode1, op0);
19707 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19708 op1 = copy_to_mode_reg (mode2, op1);
19709 if (optimize || target == 0
19710 || GET_MODE (target) != tmode
19711 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19712 target = gen_reg_rtx (tmode);
19713 pat = GEN_FCN (icode) (target, op0, op1);
19719 case IX86_BUILTIN_EXTRQI:
19720 icode = CODE_FOR_sse4a_extrqi;
19721 arg0 = CALL_EXPR_ARG (exp, 0);
19722 arg1 = CALL_EXPR_ARG (exp, 1);
19723 arg2 = CALL_EXPR_ARG (exp, 2);
19724 op0 = expand_normal (arg0);
19725 op1 = expand_normal (arg1);
19726 op2 = expand_normal (arg2);
19727 tmode = insn_data[icode].operand[0].mode;
19728 mode1 = insn_data[icode].operand[1].mode;
19729 mode2 = insn_data[icode].operand[2].mode;
19730 mode3 = insn_data[icode].operand[3].mode;
19731 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19732 op0 = copy_to_mode_reg (mode1, op0);
19733 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19735 error ("index mask must be an immediate");
19736 return gen_reg_rtx (tmode);
19738 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19740 error ("length mask must be an immediate");
19741 return gen_reg_rtx (tmode);
19743 if (optimize || target == 0
19744 || GET_MODE (target) != tmode
19745 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19746 target = gen_reg_rtx (tmode);
19747 pat = GEN_FCN (icode) (target, op0, op1, op2);
19753 case IX86_BUILTIN_INSERTQI:
19754 icode = CODE_FOR_sse4a_insertqi;
19755 arg0 = CALL_EXPR_ARG (exp, 0);
19756 arg1 = CALL_EXPR_ARG (exp, 1);
19757 arg2 = CALL_EXPR_ARG (exp, 2);
19758 arg3 = CALL_EXPR_ARG (exp, 3);
19759 op0 = expand_normal (arg0);
19760 op1 = expand_normal (arg1);
19761 op2 = expand_normal (arg2);
19762 op3 = expand_normal (arg3);
19763 tmode = insn_data[icode].operand[0].mode;
19764 mode1 = insn_data[icode].operand[1].mode;
19765 mode2 = insn_data[icode].operand[2].mode;
19766 mode3 = insn_data[icode].operand[3].mode;
19767 mode4 = insn_data[icode].operand[4].mode;
19769 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19770 op0 = copy_to_mode_reg (mode1, op0);
19772 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19773 op1 = copy_to_mode_reg (mode2, op1);
19775 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19777 error ("index mask must be an immediate");
19778 return gen_reg_rtx (tmode);
19780 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
19782 error ("length mask must be an immediate");
19783 return gen_reg_rtx (tmode);
19785 if (optimize || target == 0
19786 || GET_MODE (target) != tmode
19787 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19788 target = gen_reg_rtx (tmode);
19789 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
19795 case IX86_BUILTIN_VEC_INIT_V2SI:
19796 case IX86_BUILTIN_VEC_INIT_V4HI:
19797 case IX86_BUILTIN_VEC_INIT_V8QI:
19798 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
19800 case IX86_BUILTIN_VEC_EXT_V2DF:
19801 case IX86_BUILTIN_VEC_EXT_V2DI:
19802 case IX86_BUILTIN_VEC_EXT_V4SF:
19803 case IX86_BUILTIN_VEC_EXT_V4SI:
19804 case IX86_BUILTIN_VEC_EXT_V8HI:
19805 case IX86_BUILTIN_VEC_EXT_V2SI:
19806 case IX86_BUILTIN_VEC_EXT_V4HI:
19807 case IX86_BUILTIN_VEC_EXT_V16QI:
19808 return ix86_expand_vec_ext_builtin (exp, target);
19810 case IX86_BUILTIN_VEC_SET_V2DI:
19811 case IX86_BUILTIN_VEC_SET_V4SF:
19812 case IX86_BUILTIN_VEC_SET_V4SI:
19813 case IX86_BUILTIN_VEC_SET_V8HI:
19814 case IX86_BUILTIN_VEC_SET_V4HI:
19815 case IX86_BUILTIN_VEC_SET_V16QI:
19816 return ix86_expand_vec_set_builtin (exp);
19818 case IX86_BUILTIN_INFQ:
19820 REAL_VALUE_TYPE inf;
19824 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
19826 tmp = validize_mem (force_const_mem (mode, tmp));
19829 target = gen_reg_rtx (mode);
19831 emit_move_insn (target, tmp);
19835 case IX86_BUILTIN_FABSQ:
19836 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
19838 case IX86_BUILTIN_COPYSIGNQ:
19839 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
19845 for (i = 0, d = bdesc_sse_3arg;
19846 i < ARRAY_SIZE (bdesc_sse_3arg);
19848 if (d->code == fcode)
19849 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
19852 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19853 if (d->code == fcode)
19855 /* Compares are treated specially. */
19856 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19857 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
19858 || d->icode == CODE_FOR_sse2_maskcmpv2df3
19859 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19860 return ix86_expand_sse_compare (d, exp, target);
19862 return ix86_expand_binop_builtin (d->icode, exp, target);
19865 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19866 if (d->code == fcode)
19867 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
19869 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19870 if (d->code == fcode)
19871 return ix86_expand_sse_comi (d, exp, target);
19873 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19874 if (d->code == fcode)
19875 return ix86_expand_sse_ptest (d, exp, target);
19877 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
19878 if (d->code == fcode)
19879 return ix86_expand_crc32 (d->icode, exp, target);
19881 for (i = 0, d = bdesc_pcmpestr;
19882 i < ARRAY_SIZE (bdesc_pcmpestr);
19884 if (d->code == fcode)
19885 return ix86_expand_sse_pcmpestr (d, exp, target);
19887 for (i = 0, d = bdesc_pcmpistr;
19888 i < ARRAY_SIZE (bdesc_pcmpistr);
19890 if (d->code == fcode)
19891 return ix86_expand_sse_pcmpistr (d, exp, target);
19893 gcc_unreachable ();
19896 /* Returns a function decl for a vectorized version of the builtin function
19897 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19898 if it is not available. */
19901 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
19904 enum machine_mode in_mode, out_mode;
19907 if (TREE_CODE (type_out) != VECTOR_TYPE
19908 || TREE_CODE (type_in) != VECTOR_TYPE)
19911 out_mode = TYPE_MODE (TREE_TYPE (type_out));
19912 out_n = TYPE_VECTOR_SUBPARTS (type_out);
19913 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19914 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19918 case BUILT_IN_SQRT:
19919 if (out_mode == DFmode && out_n == 2
19920 && in_mode == DFmode && in_n == 2)
19921 return ix86_builtins[IX86_BUILTIN_SQRTPD];
19924 case BUILT_IN_SQRTF:
19925 if (out_mode == SFmode && out_n == 4
19926 && in_mode == SFmode && in_n == 4)
19927 return ix86_builtins[IX86_BUILTIN_SQRTPS];
19930 case BUILT_IN_LRINT:
19931 if (out_mode == SImode && out_n == 4
19932 && in_mode == DFmode && in_n == 2)
19933 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
19936 case BUILT_IN_LRINTF:
19937 if (out_mode == SImode && out_n == 4
19938 && in_mode == SFmode && in_n == 4)
19939 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
19949 /* Returns a decl of a function that implements conversion of the
19950 input vector of type TYPE, or NULL_TREE if it is not available. */
19953 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
19955 if (TREE_CODE (type) != VECTOR_TYPE)
19961 switch (TYPE_MODE (type))
19964 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
19969 case FIX_TRUNC_EXPR:
19970 switch (TYPE_MODE (type))
19973 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
19983 /* Returns a code for a target-specific builtin that implements
19984 reciprocal of the function, or NULL_TREE if not available. */
19987 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
19988 bool sqrt ATTRIBUTE_UNUSED)
19990 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
19991 && flag_finite_math_only && !flag_trapping_math
19992 && flag_unsafe_math_optimizations))
19996 /* Machine dependent builtins. */
19999 /* Vectorized version of sqrt to rsqrt conversion. */
20000 case IX86_BUILTIN_SQRTPS:
20001 return ix86_builtins[IX86_BUILTIN_RSQRTPS];
20007 /* Normal builtins. */
20010 /* Sqrt to rsqrt conversion. */
20011 case BUILT_IN_SQRTF:
20012 return ix86_builtins[IX86_BUILTIN_RSQRTF];
20019 /* Store OPERAND to the memory after reload is completed. This means
20020 that we can't easily use assign_stack_local. */
20022 ix86_force_to_memory (enum machine_mode mode, rtx operand)
20026 gcc_assert (reload_completed);
20027 if (TARGET_RED_ZONE)
20029 result = gen_rtx_MEM (mode,
20030 gen_rtx_PLUS (Pmode,
20032 GEN_INT (-RED_ZONE_SIZE)));
20033 emit_move_insn (result, operand);
20035 else if (!TARGET_RED_ZONE && TARGET_64BIT)
20041 operand = gen_lowpart (DImode, operand);
20045 gen_rtx_SET (VOIDmode,
20046 gen_rtx_MEM (DImode,
20047 gen_rtx_PRE_DEC (DImode,
20048 stack_pointer_rtx)),
20052 gcc_unreachable ();
20054 result = gen_rtx_MEM (mode, stack_pointer_rtx);
20063 split_di (&operand, 1, operands, operands + 1);
20065 gen_rtx_SET (VOIDmode,
20066 gen_rtx_MEM (SImode,
20067 gen_rtx_PRE_DEC (Pmode,
20068 stack_pointer_rtx)),
20071 gen_rtx_SET (VOIDmode,
20072 gen_rtx_MEM (SImode,
20073 gen_rtx_PRE_DEC (Pmode,
20074 stack_pointer_rtx)),
20079 /* Store HImodes as SImodes. */
20080 operand = gen_lowpart (SImode, operand);
20084 gen_rtx_SET (VOIDmode,
20085 gen_rtx_MEM (GET_MODE (operand),
20086 gen_rtx_PRE_DEC (SImode,
20087 stack_pointer_rtx)),
20091 gcc_unreachable ();
20093 result = gen_rtx_MEM (mode, stack_pointer_rtx);
20098 /* Free operand from the memory. */
20100 ix86_free_from_memory (enum machine_mode mode)
20102 if (!TARGET_RED_ZONE)
20106 if (mode == DImode || TARGET_64BIT)
20110 /* Use LEA to deallocate stack space. In peephole2 it will be converted
20111 to pop or add instruction if registers are available. */
20112 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20113 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
20118 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
20119 QImode must go into class Q_REGS.
20120 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20121 movdf to do mem-to-mem moves through integer regs. */
20123 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
20125 enum machine_mode mode = GET_MODE (x);
20127 /* We're only allowed to return a subclass of CLASS. Many of the
20128 following checks fail for NO_REGS, so eliminate that early. */
20129 if (regclass == NO_REGS)
20132 /* All classes can load zeros. */
20133 if (x == CONST0_RTX (mode))
20136 /* Force constants into memory if we are loading a (nonzero) constant into
20137 an MMX or SSE register. This is because there are no MMX/SSE instructions
20138 to load from a constant. */
20140 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
20143 /* Prefer SSE regs only, if we can use them for math. */
20144 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
20145 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20147 /* Floating-point constants need more complex checks. */
20148 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
20150 /* General regs can load everything. */
20151 if (reg_class_subset_p (regclass, GENERAL_REGS))
20154 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20155 zero above. We only want to wind up preferring 80387 registers if
20156 we plan on doing computation with them. */
20158 && standard_80387_constant_p (x))
20160 /* Limit class to non-sse. */
20161 if (regclass == FLOAT_SSE_REGS)
20163 if (regclass == FP_TOP_SSE_REGS)
20165 if (regclass == FP_SECOND_SSE_REGS)
20166 return FP_SECOND_REG;
20167 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
20174 /* Generally when we see PLUS here, it's the function invariant
20175 (plus soft-fp const_int). Which can only be computed into general
20177 if (GET_CODE (x) == PLUS)
20178 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
20180 /* QImode constants are easy to load, but non-constant QImode data
20181 must go into Q_REGS. */
20182 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20184 if (reg_class_subset_p (regclass, Q_REGS))
20186 if (reg_class_subset_p (Q_REGS, regclass))
20194 /* Discourage putting floating-point values in SSE registers unless
20195 SSE math is being used, and likewise for the 387 registers. */
20197 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
20199 enum machine_mode mode = GET_MODE (x);
20201 /* Restrict the output reload class to the register bank that we are doing
20202 math on. If we would like not to return a subset of CLASS, reject this
20203 alternative: if reload cannot do this, it will still use its choice. */
20204 mode = GET_MODE (x);
20205 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
20206 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
20208 if (X87_FLOAT_MODE_P (mode))
20210 if (regclass == FP_TOP_SSE_REGS)
20212 else if (regclass == FP_SECOND_SSE_REGS)
20213 return FP_SECOND_REG;
20215 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20221 /* If we are copying between general and FP registers, we need a memory
20222 location. The same is true for SSE and MMX registers.
20224 To optimize register_move_cost performance, allow inline variant.
20226 The macro can't work reliably when one of the CLASSES is class containing
20227 registers from multiple units (SSE, MMX, integer). We avoid this by never
20228 combining those units in single alternative in the machine description.
20229 Ensure that this constraint holds to avoid unexpected surprises.
20231 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
20232 enforce these sanity checks. */
20235 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
20236 enum machine_mode mode, int strict)
20238 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
20239 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
20240 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
20241 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
20242 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
20243 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
20245 gcc_assert (!strict);
20249 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
20252 /* ??? This is a lie. We do have moves between mmx/general, and for
20253 mmx/sse2. But by saying we need secondary memory we discourage the
20254 register allocator from using the mmx registers unless needed. */
20255 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20258 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20260 /* SSE1 doesn't have any direct moves from other classes. */
20264 /* If the target says that inter-unit moves are more expensive
20265 than moving through memory, then don't generate them. */
20266 if (!TARGET_INTER_UNIT_MOVES)
20269 /* Between SSE and general, we have moves no larger than word size. */
20270 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20278 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
20279 enum machine_mode mode, int strict)
20281 return inline_secondary_memory_needed (class1, class2, mode, strict);
20284 /* Return true if the registers in CLASS cannot represent the change from
20285 modes FROM to TO. */
20288 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
20289 enum reg_class regclass)
20294 /* x87 registers can't do subreg at all, as all values are reformatted
20295 to extended precision. */
20296 if (MAYBE_FLOAT_CLASS_P (regclass))
20299 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
20301 /* Vector registers do not support QI or HImode loads. If we don't
20302 disallow a change to these modes, reload will assume it's ok to
20303 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20304 the vec_dupv4hi pattern. */
20305 if (GET_MODE_SIZE (from) < 4)
20308 /* Vector registers do not support subreg with nonzero offsets, which
20309 are otherwise valid for integer registers. Since we can't see
20310 whether we have a nonzero offset from here, prohibit all
20311 nonparadoxical subregs changing size. */
20312 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
20319 /* Return the cost of moving data of mode M between a
20320 register and memory. A value of 2 is the default; this cost is
20321 relative to those in `REGISTER_MOVE_COST'.
20323 This function is used extensively by register_move_cost that is used to
20324 build tables at startup. Make it inline in this case.
20325 When IN is 2, return maximum of in and out move cost.
20327 If moving between registers and memory is more expensive than
20328 between two registers, you should define this macro to express the
20331 Model also increased moving costs of QImode registers in non
20335 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
20339 if (FLOAT_CLASS_P (regclass))
20357 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
20358 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
20360 if (SSE_CLASS_P (regclass))
20363 switch (GET_MODE_SIZE (mode))
20378 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
20379 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
20381 if (MMX_CLASS_P (regclass))
20384 switch (GET_MODE_SIZE (mode))
20396 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
20397 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
20399 switch (GET_MODE_SIZE (mode))
20402 if (Q_CLASS_P (regclass) || TARGET_64BIT)
20405 return ix86_cost->int_store[0];
20406 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
20407 cost = ix86_cost->movzbl_load;
20409 cost = ix86_cost->int_load[0];
20411 return MAX (cost, ix86_cost->int_store[0]);
20417 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
20419 return ix86_cost->movzbl_load;
20421 return ix86_cost->int_store[0] + 4;
20426 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
20427 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
20429 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
20430 if (mode == TFmode)
20433 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
20435 cost = ix86_cost->int_load[2];
20437 cost = ix86_cost->int_store[2];
20438 return (cost * (((int) GET_MODE_SIZE (mode)
20439 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
20444 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
20446 return inline_memory_move_cost (mode, regclass, in);
20450 /* Return the cost of moving data from a register in class CLASS1 to
20451 one in class CLASS2.
20453 It is not required that the cost always equal 2 when FROM is the same as TO;
20454 on some machines it is expensive to move between registers if they are not
20455 general registers. */
20458 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
20459 enum reg_class class2)
20461 /* In case we require secondary memory, compute cost of the store followed
20462 by load. In order to avoid bad register allocation choices, we need
20463 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20465 if (inline_secondary_memory_needed (class1, class2, mode, 0))
20469 cost += inline_memory_move_cost (mode, class1, 2);
20470 cost += inline_memory_move_cost (mode, class2, 2);
20472 /* In case of copying from general_purpose_register we may emit multiple
20473 stores followed by single load causing memory size mismatch stall.
20474 Count this as arbitrarily high cost of 20. */
20475 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
20478 /* In the case of FP/MMX moves, the registers actually overlap, and we
20479 have to switch modes in order to treat them differently. */
20480 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20481 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20487 /* Moves between SSE/MMX and integer unit are expensive. */
20488 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
20489 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20491 /* ??? By keeping returned value relatively high, we limit the number
20492 of moves between integer and MMX/SSE registers for all targets.
20493 Additionally, high value prevents problem with x86_modes_tieable_p(),
20494 where integer modes in MMX/SSE registers are not tieable
20495 because of missing QImode and HImode moves to, from or between
20496 MMX/SSE registers. */
20497 return MAX (ix86_cost->mmxsse_to_integer, 8);
20499 if (MAYBE_FLOAT_CLASS_P (class1))
20500 return ix86_cost->fp_move;
20501 if (MAYBE_SSE_CLASS_P (class1))
20502 return ix86_cost->sse_move;
20503 if (MAYBE_MMX_CLASS_P (class1))
20504 return ix86_cost->mmx_move;
20508 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
20511 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
20513 /* Flags and only flags can only hold CCmode values. */
20514 if (CC_REGNO_P (regno))
20515 return GET_MODE_CLASS (mode) == MODE_CC;
20516 if (GET_MODE_CLASS (mode) == MODE_CC
20517 || GET_MODE_CLASS (mode) == MODE_RANDOM
20518 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20520 if (FP_REGNO_P (regno))
20521 return VALID_FP_MODE_P (mode);
20522 if (SSE_REGNO_P (regno))
20524 /* We implement the move patterns for all vector modes into and
20525 out of SSE registers, even when no operation instructions
20527 return (VALID_SSE_REG_MODE (mode)
20528 || VALID_SSE2_REG_MODE (mode)
20529 || VALID_MMX_REG_MODE (mode)
20530 || VALID_MMX_REG_MODE_3DNOW (mode));
20532 if (MMX_REGNO_P (regno))
20534 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20535 so if the register is available at all, then we can move data of
20536 the given mode into or out of it. */
20537 return (VALID_MMX_REG_MODE (mode)
20538 || VALID_MMX_REG_MODE_3DNOW (mode));
20541 if (mode == QImode)
20543 /* Take care for QImode values - they can be in non-QI regs,
20544 but then they do cause partial register stalls. */
20545 if (regno < 4 || TARGET_64BIT)
20547 if (!TARGET_PARTIAL_REG_STALL)
20549 return reload_in_progress || reload_completed;
20551 /* We handle both integer and floats in the general purpose registers. */
20552 else if (VALID_INT_MODE_P (mode))
20554 else if (VALID_FP_MODE_P (mode))
20556 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20557 on to use that value in smaller contexts, this can easily force a
20558 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20559 supporting DImode, allow it. */
20560 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
20566 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20567 tieable integer mode. */
20570 ix86_tieable_integer_mode_p (enum machine_mode mode)
20579 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
20582 return TARGET_64BIT;
20589 /* Return true if MODE1 is accessible in a register that can hold MODE2
20590 without copying. That is, all register classes that can hold MODE2
20591 can also hold MODE1. */
20594 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
20596 if (mode1 == mode2)
20599 if (ix86_tieable_integer_mode_p (mode1)
20600 && ix86_tieable_integer_mode_p (mode2))
20603 /* MODE2 being XFmode implies fp stack or general regs, which means we
20604 can tie any smaller floating point modes to it. Note that we do not
20605 tie this with TFmode. */
20606 if (mode2 == XFmode)
20607 return mode1 == SFmode || mode1 == DFmode;
20609 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20610 that we can tie it with SFmode. */
20611 if (mode2 == DFmode)
20612 return mode1 == SFmode;
20614 /* If MODE2 is only appropriate for an SSE register, then tie with
20615 any other mode acceptable to SSE registers. */
20616 if (GET_MODE_SIZE (mode2) == 16
20617 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20618 return (GET_MODE_SIZE (mode1) == 16
20619 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20621 /* If MODE2 is appropriate for an MMX register, then tie
20622 with any other mode acceptable to MMX registers. */
20623 if (GET_MODE_SIZE (mode2) == 8
20624 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
20625 return (GET_MODE_SIZE (mode1) == 8
20626 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
20631 /* Compute a (partial) cost for rtx X. Return true if the complete
20632 cost has been computed, and false if subexpressions should be
20633 scanned. In either case, *TOTAL contains the cost result. */
20636 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
20638 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
20639 enum machine_mode mode = GET_MODE (x);
20647 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
20649 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
20651 else if (flag_pic && SYMBOLIC_CONST (x)
20653 || (!GET_CODE (x) != LABEL_REF
20654 && (GET_CODE (x) != SYMBOL_REF
20655 || !SYMBOL_REF_LOCAL_P (x)))))
20662 if (mode == VOIDmode)
20665 switch (standard_80387_constant_p (x))
20670 default: /* Other constants */
20675 /* Start with (MEM (SYMBOL_REF)), since that's where
20676 it'll probably end up. Add a penalty for size. */
20677 *total = (COSTS_N_INSNS (1)
20678 + (flag_pic != 0 && !TARGET_64BIT)
20679 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
20685 /* The zero extensions is often completely free on x86_64, so make
20686 it as cheap as possible. */
20687 if (TARGET_64BIT && mode == DImode
20688 && GET_MODE (XEXP (x, 0)) == SImode)
20690 else if (TARGET_ZERO_EXTEND_WITH_AND)
20691 *total = ix86_cost->add;
20693 *total = ix86_cost->movzx;
20697 *total = ix86_cost->movsx;
20701 if (CONST_INT_P (XEXP (x, 1))
20702 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
20704 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20707 *total = ix86_cost->add;
20710 if ((value == 2 || value == 3)
20711 && ix86_cost->lea <= ix86_cost->shift_const)
20713 *total = ix86_cost->lea;
20723 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
20725 if (CONST_INT_P (XEXP (x, 1)))
20727 if (INTVAL (XEXP (x, 1)) > 32)
20728 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
20730 *total = ix86_cost->shift_const * 2;
20734 if (GET_CODE (XEXP (x, 1)) == AND)
20735 *total = ix86_cost->shift_var * 2;
20737 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
20742 if (CONST_INT_P (XEXP (x, 1)))
20743 *total = ix86_cost->shift_const;
20745 *total = ix86_cost->shift_var;
20750 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20752 /* ??? SSE scalar cost should be used here. */
20753 *total = ix86_cost->fmul;
20756 else if (X87_FLOAT_MODE_P (mode))
20758 *total = ix86_cost->fmul;
20761 else if (FLOAT_MODE_P (mode))
20763 /* ??? SSE vector cost should be used here. */
20764 *total = ix86_cost->fmul;
20769 rtx op0 = XEXP (x, 0);
20770 rtx op1 = XEXP (x, 1);
20772 if (CONST_INT_P (XEXP (x, 1)))
20774 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20775 for (nbits = 0; value != 0; value &= value - 1)
20779 /* This is arbitrary. */
20782 /* Compute costs correctly for widening multiplication. */
20783 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
20784 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
20785 == GET_MODE_SIZE (mode))
20787 int is_mulwiden = 0;
20788 enum machine_mode inner_mode = GET_MODE (op0);
20790 if (GET_CODE (op0) == GET_CODE (op1))
20791 is_mulwiden = 1, op1 = XEXP (op1, 0);
20792 else if (CONST_INT_P (op1))
20794 if (GET_CODE (op0) == SIGN_EXTEND)
20795 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
20798 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
20802 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
20805 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
20806 + nbits * ix86_cost->mult_bit
20807 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
20816 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20817 /* ??? SSE cost should be used here. */
20818 *total = ix86_cost->fdiv;
20819 else if (X87_FLOAT_MODE_P (mode))
20820 *total = ix86_cost->fdiv;
20821 else if (FLOAT_MODE_P (mode))
20822 /* ??? SSE vector cost should be used here. */
20823 *total = ix86_cost->fdiv;
20825 *total = ix86_cost->divide[MODE_INDEX (mode)];
20829 if (GET_MODE_CLASS (mode) == MODE_INT
20830 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
20832 if (GET_CODE (XEXP (x, 0)) == PLUS
20833 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
20834 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
20835 && CONSTANT_P (XEXP (x, 1)))
20837 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20838 if (val == 2 || val == 4 || val == 8)
20840 *total = ix86_cost->lea;
20841 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20842 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
20844 *total += rtx_cost (XEXP (x, 1), outer_code);
20848 else if (GET_CODE (XEXP (x, 0)) == MULT
20849 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20851 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20852 if (val == 2 || val == 4 || val == 8)
20854 *total = ix86_cost->lea;
20855 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20856 *total += rtx_cost (XEXP (x, 1), outer_code);
20860 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20862 *total = ix86_cost->lea;
20863 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20864 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20865 *total += rtx_cost (XEXP (x, 1), outer_code);
20872 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20874 /* ??? SSE cost should be used here. */
20875 *total = ix86_cost->fadd;
20878 else if (X87_FLOAT_MODE_P (mode))
20880 *total = ix86_cost->fadd;
20883 else if (FLOAT_MODE_P (mode))
20885 /* ??? SSE vector cost should be used here. */
20886 *total = ix86_cost->fadd;
20894 if (!TARGET_64BIT && mode == DImode)
20896 *total = (ix86_cost->add * 2
20897 + (rtx_cost (XEXP (x, 0), outer_code)
20898 << (GET_MODE (XEXP (x, 0)) != DImode))
20899 + (rtx_cost (XEXP (x, 1), outer_code)
20900 << (GET_MODE (XEXP (x, 1)) != DImode)));
20906 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20908 /* ??? SSE cost should be used here. */
20909 *total = ix86_cost->fchs;
20912 else if (X87_FLOAT_MODE_P (mode))
20914 *total = ix86_cost->fchs;
20917 else if (FLOAT_MODE_P (mode))
20919 /* ??? SSE vector cost should be used here. */
20920 *total = ix86_cost->fchs;
20926 if (!TARGET_64BIT && mode == DImode)
20927 *total = ix86_cost->add * 2;
20929 *total = ix86_cost->add;
20933 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
20934 && XEXP (XEXP (x, 0), 1) == const1_rtx
20935 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
20936 && XEXP (x, 1) == const0_rtx)
20938 /* This kind of construct is implemented using test[bwl].
20939 Treat it as if we had an AND. */
20940 *total = (ix86_cost->add
20941 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
20942 + rtx_cost (const1_rtx, outer_code));
20948 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
20953 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20954 /* ??? SSE cost should be used here. */
20955 *total = ix86_cost->fabs;
20956 else if (X87_FLOAT_MODE_P (mode))
20957 *total = ix86_cost->fabs;
20958 else if (FLOAT_MODE_P (mode))
20959 /* ??? SSE vector cost should be used here. */
20960 *total = ix86_cost->fabs;
20964 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20965 /* ??? SSE cost should be used here. */
20966 *total = ix86_cost->fsqrt;
20967 else if (X87_FLOAT_MODE_P (mode))
20968 *total = ix86_cost->fsqrt;
20969 else if (FLOAT_MODE_P (mode))
20970 /* ??? SSE vector cost should be used here. */
20971 *total = ix86_cost->fsqrt;
20975 if (XINT (x, 1) == UNSPEC_TP)
20986 static int current_machopic_label_num;
20988 /* Given a symbol name and its associated stub, write out the
20989 definition of the stub. */
20992 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20994 unsigned int length;
20995 char *binder_name, *symbol_name, lazy_ptr_name[32];
20996 int label = ++current_machopic_label_num;
20998 /* For 64-bit we shouldn't get here. */
20999 gcc_assert (!TARGET_64BIT);
21001 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21002 symb = (*targetm.strip_name_encoding) (symb);
21004 length = strlen (stub);
21005 binder_name = alloca (length + 32);
21006 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
21008 length = strlen (symb);
21009 symbol_name = alloca (length + 32);
21010 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
21012 sprintf (lazy_ptr_name, "L%d$lz", label);
21015 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
21017 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
21019 fprintf (file, "%s:\n", stub);
21020 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21024 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
21025 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
21026 fprintf (file, "\tjmp\t*%%edx\n");
21029 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
21031 fprintf (file, "%s:\n", binder_name);
21035 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
21036 fprintf (file, "\tpushl\t%%eax\n");
21039 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
21041 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
21043 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
21044 fprintf (file, "%s:\n", lazy_ptr_name);
21045 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21046 fprintf (file, "\t.long %s\n", binder_name);
21050 darwin_x86_file_end (void)
21052 darwin_file_end ();
21055 #endif /* TARGET_MACHO */
21057 /* Order the registers for register allocator. */
21060 x86_order_regs_for_local_alloc (void)
21065 /* First allocate the local general purpose registers. */
21066 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21067 if (GENERAL_REGNO_P (i) && call_used_regs[i])
21068 reg_alloc_order [pos++] = i;
21070 /* Global general purpose registers. */
21071 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21072 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
21073 reg_alloc_order [pos++] = i;
21075 /* x87 registers come first in case we are doing FP math
21077 if (!TARGET_SSE_MATH)
21078 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21079 reg_alloc_order [pos++] = i;
21081 /* SSE registers. */
21082 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
21083 reg_alloc_order [pos++] = i;
21084 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
21085 reg_alloc_order [pos++] = i;
21087 /* x87 registers. */
21088 if (TARGET_SSE_MATH)
21089 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21090 reg_alloc_order [pos++] = i;
21092 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
21093 reg_alloc_order [pos++] = i;
21095 /* Initialize the rest of array as we do not allocate some registers
21097 while (pos < FIRST_PSEUDO_REGISTER)
21098 reg_alloc_order [pos++] = 0;
21101 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
21102 struct attribute_spec.handler. */
21104 ix86_handle_struct_attribute (tree *node, tree name,
21105 tree args ATTRIBUTE_UNUSED,
21106 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
21109 if (DECL_P (*node))
21111 if (TREE_CODE (*node) == TYPE_DECL)
21112 type = &TREE_TYPE (*node);
21117 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
21118 || TREE_CODE (*type) == UNION_TYPE)))
21120 warning (OPT_Wattributes, "%qs attribute ignored",
21121 IDENTIFIER_POINTER (name));
21122 *no_add_attrs = true;
21125 else if ((is_attribute_p ("ms_struct", name)
21126 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
21127 || ((is_attribute_p ("gcc_struct", name)
21128 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
21130 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
21131 IDENTIFIER_POINTER (name));
21132 *no_add_attrs = true;
21139 ix86_ms_bitfield_layout_p (tree record_type)
21141 return (TARGET_MS_BITFIELD_LAYOUT &&
21142 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
21143 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
21146 /* Returns an expression indicating where the this parameter is
21147 located on entry to the FUNCTION. */
21150 x86_this_parameter (tree function)
21152 tree type = TREE_TYPE (function);
21153 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
21157 const int *parm_regs;
21159 if (TARGET_64BIT_MS_ABI)
21160 parm_regs = x86_64_ms_abi_int_parameter_registers;
21162 parm_regs = x86_64_int_parameter_registers;
21163 return gen_rtx_REG (DImode, parm_regs[aggr]);
21166 if (ix86_function_regparm (type, function) > 0
21167 && !type_has_variadic_args_p (type))
21170 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
21172 return gen_rtx_REG (SImode, regno);
21175 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
21178 /* Determine whether x86_output_mi_thunk can succeed. */
21181 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
21182 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
21183 HOST_WIDE_INT vcall_offset, tree function)
21185 /* 64-bit can handle anything. */
21189 /* For 32-bit, everything's fine if we have one free register. */
21190 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
21193 /* Need a free register for vcall_offset. */
21197 /* Need a free register for GOT references. */
21198 if (flag_pic && !(*targetm.binds_local_p) (function))
21201 /* Otherwise ok. */
21205 /* Output the assembler code for a thunk function. THUNK_DECL is the
21206 declaration for the thunk function itself, FUNCTION is the decl for
21207 the target function. DELTA is an immediate constant offset to be
21208 added to THIS. If VCALL_OFFSET is nonzero, the word at
21209 *(*this + vcall_offset) should be added to THIS. */
21212 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
21213 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
21214 HOST_WIDE_INT vcall_offset, tree function)
21217 rtx this_param = x86_this_parameter (function);
21220 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21221 pull it in now and let DELTA benefit. */
21222 if (REG_P (this_param))
21223 this_reg = this_param;
21224 else if (vcall_offset)
21226 /* Put the this parameter into %eax. */
21227 xops[0] = this_param;
21228 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
21229 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21232 this_reg = NULL_RTX;
21234 /* Adjust the this parameter by a fixed constant. */
21237 xops[0] = GEN_INT (delta);
21238 xops[1] = this_reg ? this_reg : this_param;
21241 if (!x86_64_general_operand (xops[0], DImode))
21243 tmp = gen_rtx_REG (DImode, R10_REG);
21245 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
21247 xops[1] = this_param;
21249 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21252 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21255 /* Adjust the this parameter by a value stored in the vtable. */
21259 tmp = gen_rtx_REG (DImode, R10_REG);
21262 int tmp_regno = 2 /* ECX */;
21263 if (lookup_attribute ("fastcall",
21264 TYPE_ATTRIBUTES (TREE_TYPE (function))))
21265 tmp_regno = 0 /* EAX */;
21266 tmp = gen_rtx_REG (SImode, tmp_regno);
21269 xops[0] = gen_rtx_MEM (Pmode, this_reg);
21272 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21274 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21276 /* Adjust the this parameter. */
21277 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
21278 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
21280 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
21281 xops[0] = GEN_INT (vcall_offset);
21283 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21284 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
21286 xops[1] = this_reg;
21288 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21290 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21293 /* If necessary, drop THIS back to its stack slot. */
21294 if (this_reg && this_reg != this_param)
21296 xops[0] = this_reg;
21297 xops[1] = this_param;
21298 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21301 xops[0] = XEXP (DECL_RTL (function), 0);
21304 if (!flag_pic || (*targetm.binds_local_p) (function))
21305 output_asm_insn ("jmp\t%P0", xops);
21306 /* All thunks should be in the same object as their target,
21307 and thus binds_local_p should be true. */
21308 else if (TARGET_64BIT_MS_ABI)
21309 gcc_unreachable ();
21312 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
21313 tmp = gen_rtx_CONST (Pmode, tmp);
21314 tmp = gen_rtx_MEM (QImode, tmp);
21316 output_asm_insn ("jmp\t%A0", xops);
21321 if (!flag_pic || (*targetm.binds_local_p) (function))
21322 output_asm_insn ("jmp\t%P0", xops);
21327 rtx sym_ref = XEXP (DECL_RTL (function), 0);
21328 tmp = (gen_rtx_SYMBOL_REF
21330 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
21331 tmp = gen_rtx_MEM (QImode, tmp);
21333 output_asm_insn ("jmp\t%0", xops);
21336 #endif /* TARGET_MACHO */
21338 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
21339 output_set_got (tmp, NULL_RTX);
21342 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
21343 output_asm_insn ("jmp\t{*}%1", xops);
21349 x86_file_start (void)
21351 default_file_start ();
21353 darwin_file_start ();
21355 if (X86_FILE_START_VERSION_DIRECTIVE)
21356 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
21357 if (X86_FILE_START_FLTUSED)
21358 fputs ("\t.global\t__fltused\n", asm_out_file);
21359 if (ix86_asm_dialect == ASM_INTEL)
21360 fputs ("\t.intel_syntax\n", asm_out_file);
21364 x86_field_alignment (tree field, int computed)
21366 enum machine_mode mode;
21367 tree type = TREE_TYPE (field);
21369 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
21371 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
21372 ? get_inner_array_type (type) : type);
21373 if (mode == DFmode || mode == DCmode
21374 || GET_MODE_CLASS (mode) == MODE_INT
21375 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
21376 return MIN (32, computed);
21380 /* Output assembler code to FILE to increment profiler label # LABELNO
21381 for profiling a function entry. */
21383 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
21387 #ifndef NO_PROFILE_COUNTERS
21388 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
21391 if (!TARGET_64BIT_MS_ABI && flag_pic)
21392 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
21394 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21398 #ifndef NO_PROFILE_COUNTERS
21399 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
21400 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
21402 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
21406 #ifndef NO_PROFILE_COUNTERS
21407 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
21408 PROFILE_COUNT_REGISTER);
21410 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21414 /* We don't have exact information about the insn sizes, but we may assume
21415 quite safely that we are informed about all 1 byte insns and memory
21416 address sizes. This is enough to eliminate unnecessary padding in
21420 min_insn_size (rtx insn)
21424 if (!INSN_P (insn) || !active_insn_p (insn))
21427 /* Discard alignments we've emit and jump instructions. */
21428 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
21429 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
21432 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
21433 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
21436 /* Important case - calls are always 5 bytes.
21437 It is common to have many calls in the row. */
21439 && symbolic_reference_mentioned_p (PATTERN (insn))
21440 && !SIBLING_CALL_P (insn))
21442 if (get_attr_length (insn) <= 1)
21445 /* For normal instructions we may rely on the sizes of addresses
21446 and the presence of symbol to require 4 bytes of encoding.
21447 This is not the case for jumps where references are PC relative. */
21448 if (!JUMP_P (insn))
21450 l = get_attr_length_address (insn);
21451 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
21460 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
21464 ix86_avoid_jump_misspredicts (void)
21466 rtx insn, start = get_insns ();
21467 int nbytes = 0, njumps = 0;
21470 /* Look for all minimal intervals of instructions containing 4 jumps.
21471 The intervals are bounded by START and INSN. NBYTES is the total
21472 size of instructions in the interval including INSN and not including
21473 START. When the NBYTES is smaller than 16 bytes, it is possible
21474 that the end of START and INSN ends up in the same 16byte page.
21476 The smallest offset in the page INSN can start is the case where START
21477 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
21478 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
21480 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21483 nbytes += min_insn_size (insn);
21485 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
21486 INSN_UID (insn), min_insn_size (insn));
21488 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21489 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
21497 start = NEXT_INSN (start);
21498 if ((JUMP_P (start)
21499 && GET_CODE (PATTERN (start)) != ADDR_VEC
21500 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
21502 njumps--, isjump = 1;
21505 nbytes -= min_insn_size (start);
21507 gcc_assert (njumps >= 0);
21509 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
21510 INSN_UID (start), INSN_UID (insn), nbytes);
21512 if (njumps == 3 && isjump && nbytes < 16)
21514 int padsize = 15 - nbytes + min_insn_size (insn);
21517 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
21518 INSN_UID (insn), padsize);
21519 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
21524 /* AMD Athlon works faster
21525 when RET is not destination of conditional jump or directly preceded
21526 by other jump instruction. We avoid the penalty by inserting NOP just
21527 before the RET instructions in such cases. */
21529 ix86_pad_returns (void)
21534 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
21536 basic_block bb = e->src;
21537 rtx ret = BB_END (bb);
21539 bool replace = false;
21541 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
21542 || !maybe_hot_bb_p (bb))
21544 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
21545 if (active_insn_p (prev) || LABEL_P (prev))
21547 if (prev && LABEL_P (prev))
21552 FOR_EACH_EDGE (e, ei, bb->preds)
21553 if (EDGE_FREQUENCY (e) && e->src->index >= 0
21554 && !(e->flags & EDGE_FALLTHRU))
21559 prev = prev_active_insn (ret);
21561 && ((JUMP_P (prev) && any_condjump_p (prev))
21564 /* Empty functions get branch mispredict even when the jump destination
21565 is not visible to us. */
21566 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
21571 emit_insn_before (gen_return_internal_long (), ret);
21577 /* Implement machine specific optimizations. We implement padding of returns
21578 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21582 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
21583 ix86_pad_returns ();
21584 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
21585 ix86_avoid_jump_misspredicts ();
21588 /* Return nonzero when QImode register that must be represented via REX prefix
21591 x86_extended_QIreg_mentioned_p (rtx insn)
21594 extract_insn_cached (insn);
21595 for (i = 0; i < recog_data.n_operands; i++)
21596 if (REG_P (recog_data.operand[i])
21597 && REGNO (recog_data.operand[i]) >= 4)
21602 /* Return nonzero when P points to register encoded via REX prefix.
21603 Called via for_each_rtx. */
21605 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
21607 unsigned int regno;
21610 regno = REGNO (*p);
21611 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
21614 /* Return true when INSN mentions register that must be encoded using REX
21617 x86_extended_reg_mentioned_p (rtx insn)
21619 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
21622 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21623 optabs would emit if we didn't have TFmode patterns. */
21626 x86_emit_floatuns (rtx operands[2])
21628 rtx neglab, donelab, i0, i1, f0, in, out;
21629 enum machine_mode mode, inmode;
21631 inmode = GET_MODE (operands[1]);
21632 gcc_assert (inmode == SImode || inmode == DImode);
21635 in = force_reg (inmode, operands[1]);
21636 mode = GET_MODE (out);
21637 neglab = gen_label_rtx ();
21638 donelab = gen_label_rtx ();
21639 f0 = gen_reg_rtx (mode);
21641 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
21643 expand_float (out, in, 0);
21645 emit_jump_insn (gen_jump (donelab));
21648 emit_label (neglab);
21650 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
21652 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
21654 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
21656 expand_float (f0, i0, 0);
21658 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
21660 emit_label (donelab);
21663 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21664 with all elements equal to VAR. Return true if successful. */
21667 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
21668 rtx target, rtx val)
21670 enum machine_mode smode, wsmode, wvmode;
21685 val = force_reg (GET_MODE_INNER (mode), val);
21686 x = gen_rtx_VEC_DUPLICATE (mode, val);
21687 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21693 if (TARGET_SSE || TARGET_3DNOW_A)
21695 val = gen_lowpart (SImode, val);
21696 x = gen_rtx_TRUNCATE (HImode, val);
21697 x = gen_rtx_VEC_DUPLICATE (mode, x);
21698 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21720 /* Extend HImode to SImode using a paradoxical SUBREG. */
21721 tmp1 = gen_reg_rtx (SImode);
21722 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21723 /* Insert the SImode value as low element of V4SImode vector. */
21724 tmp2 = gen_reg_rtx (V4SImode);
21725 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21726 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21727 CONST0_RTX (V4SImode),
21729 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21730 /* Cast the V4SImode vector back to a V8HImode vector. */
21731 tmp1 = gen_reg_rtx (V8HImode);
21732 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
21733 /* Duplicate the low short through the whole low SImode word. */
21734 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
21735 /* Cast the V8HImode vector back to a V4SImode vector. */
21736 tmp2 = gen_reg_rtx (V4SImode);
21737 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21738 /* Replicate the low element of the V4SImode vector. */
21739 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21740 /* Cast the V2SImode back to V8HImode, and store in target. */
21741 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
21752 /* Extend QImode to SImode using a paradoxical SUBREG. */
21753 tmp1 = gen_reg_rtx (SImode);
21754 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21755 /* Insert the SImode value as low element of V4SImode vector. */
21756 tmp2 = gen_reg_rtx (V4SImode);
21757 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21758 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21759 CONST0_RTX (V4SImode),
21761 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21762 /* Cast the V4SImode vector back to a V16QImode vector. */
21763 tmp1 = gen_reg_rtx (V16QImode);
21764 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
21765 /* Duplicate the low byte through the whole low SImode word. */
21766 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21767 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21768 /* Cast the V16QImode vector back to a V4SImode vector. */
21769 tmp2 = gen_reg_rtx (V4SImode);
21770 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21771 /* Replicate the low element of the V4SImode vector. */
21772 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21773 /* Cast the V2SImode back to V16QImode, and store in target. */
21774 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
21782 /* Replicate the value once into the next wider mode and recurse. */
21783 val = convert_modes (wsmode, smode, val, true);
21784 x = expand_simple_binop (wsmode, ASHIFT, val,
21785 GEN_INT (GET_MODE_BITSIZE (smode)),
21786 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21787 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
21789 x = gen_reg_rtx (wvmode);
21790 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
21791 gcc_unreachable ();
21792 emit_move_insn (target, gen_lowpart (mode, x));
21800 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21801 whose ONE_VAR element is VAR, and other elements are zero. Return true
21805 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
21806 rtx target, rtx var, int one_var)
21808 enum machine_mode vsimode;
21824 var = force_reg (GET_MODE_INNER (mode), var);
21825 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
21826 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21831 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
21832 new_target = gen_reg_rtx (mode);
21834 new_target = target;
21835 var = force_reg (GET_MODE_INNER (mode), var);
21836 x = gen_rtx_VEC_DUPLICATE (mode, var);
21837 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
21838 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
21841 /* We need to shuffle the value to the correct position, so
21842 create a new pseudo to store the intermediate result. */
21844 /* With SSE2, we can use the integer shuffle insns. */
21845 if (mode != V4SFmode && TARGET_SSE2)
21847 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
21849 GEN_INT (one_var == 1 ? 0 : 1),
21850 GEN_INT (one_var == 2 ? 0 : 1),
21851 GEN_INT (one_var == 3 ? 0 : 1)));
21852 if (target != new_target)
21853 emit_move_insn (target, new_target);
21857 /* Otherwise convert the intermediate result to V4SFmode and
21858 use the SSE1 shuffle instructions. */
21859 if (mode != V4SFmode)
21861 tmp = gen_reg_rtx (V4SFmode);
21862 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
21867 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
21869 GEN_INT (one_var == 1 ? 0 : 1),
21870 GEN_INT (one_var == 2 ? 0+4 : 1+4),
21871 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
21873 if (mode != V4SFmode)
21874 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
21875 else if (tmp != target)
21876 emit_move_insn (target, tmp);
21878 else if (target != new_target)
21879 emit_move_insn (target, new_target);
21884 vsimode = V4SImode;
21890 vsimode = V2SImode;
21896 /* Zero extend the variable element to SImode and recurse. */
21897 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
21899 x = gen_reg_rtx (vsimode);
21900 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
21902 gcc_unreachable ();
21904 emit_move_insn (target, gen_lowpart (mode, x));
21912 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21913 consisting of the values in VALS. It is known that all elements
21914 except ONE_VAR are constants. Return true if successful. */
21917 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
21918 rtx target, rtx vals, int one_var)
21920 rtx var = XVECEXP (vals, 0, one_var);
21921 enum machine_mode wmode;
21924 const_vec = copy_rtx (vals);
21925 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
21926 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
21934 /* For the two element vectors, it's just as easy to use
21935 the general case. */
21951 /* There's no way to set one QImode entry easily. Combine
21952 the variable value with its adjacent constant value, and
21953 promote to an HImode set. */
21954 x = XVECEXP (vals, 0, one_var ^ 1);
21957 var = convert_modes (HImode, QImode, var, true);
21958 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
21959 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21960 x = GEN_INT (INTVAL (x) & 0xff);
21964 var = convert_modes (HImode, QImode, var, true);
21965 x = gen_int_mode (INTVAL (x) << 8, HImode);
21967 if (x != const0_rtx)
21968 var = expand_simple_binop (HImode, IOR, var, x, var,
21969 1, OPTAB_LIB_WIDEN);
21971 x = gen_reg_rtx (wmode);
21972 emit_move_insn (x, gen_lowpart (wmode, const_vec));
21973 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
21975 emit_move_insn (target, gen_lowpart (mode, x));
21982 emit_move_insn (target, const_vec);
21983 ix86_expand_vector_set (mmx_ok, target, var, one_var);
21987 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
21988 all values variable, and none identical. */
21991 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
21992 rtx target, rtx vals)
21994 enum machine_mode half_mode = GET_MODE_INNER (mode);
21995 rtx op0 = NULL, op1 = NULL;
21996 bool use_vec_concat = false;
22002 if (!mmx_ok && !TARGET_SSE)
22008 /* For the two element vectors, we always implement VEC_CONCAT. */
22009 op0 = XVECEXP (vals, 0, 0);
22010 op1 = XVECEXP (vals, 0, 1);
22011 use_vec_concat = true;
22015 half_mode = V2SFmode;
22018 half_mode = V2SImode;
22024 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
22025 Recurse to load the two halves. */
22027 op0 = gen_reg_rtx (half_mode);
22028 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
22029 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
22031 op1 = gen_reg_rtx (half_mode);
22032 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
22033 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
22035 use_vec_concat = true;
22046 gcc_unreachable ();
22049 if (use_vec_concat)
22051 if (!register_operand (op0, half_mode))
22052 op0 = force_reg (half_mode, op0);
22053 if (!register_operand (op1, half_mode))
22054 op1 = force_reg (half_mode, op1);
22056 emit_insn (gen_rtx_SET (VOIDmode, target,
22057 gen_rtx_VEC_CONCAT (mode, op0, op1)));
22061 int i, j, n_elts, n_words, n_elt_per_word;
22062 enum machine_mode inner_mode;
22063 rtx words[4], shift;
22065 inner_mode = GET_MODE_INNER (mode);
22066 n_elts = GET_MODE_NUNITS (mode);
22067 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
22068 n_elt_per_word = n_elts / n_words;
22069 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
22071 for (i = 0; i < n_words; ++i)
22073 rtx word = NULL_RTX;
22075 for (j = 0; j < n_elt_per_word; ++j)
22077 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
22078 elt = convert_modes (word_mode, inner_mode, elt, true);
22084 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
22085 word, 1, OPTAB_LIB_WIDEN);
22086 word = expand_simple_binop (word_mode, IOR, word, elt,
22087 word, 1, OPTAB_LIB_WIDEN);
22095 emit_move_insn (target, gen_lowpart (mode, words[0]));
22096 else if (n_words == 2)
22098 rtx tmp = gen_reg_rtx (mode);
22099 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
22100 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
22101 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
22102 emit_move_insn (target, tmp);
22104 else if (n_words == 4)
22106 rtx tmp = gen_reg_rtx (V4SImode);
22107 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
22108 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
22109 emit_move_insn (target, gen_lowpart (mode, tmp));
22112 gcc_unreachable ();
22116 /* Initialize vector TARGET via VALS. Suppress the use of MMX
22117 instructions unless MMX_OK is true. */
22120 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
22122 enum machine_mode mode = GET_MODE (target);
22123 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22124 int n_elts = GET_MODE_NUNITS (mode);
22125 int n_var = 0, one_var = -1;
22126 bool all_same = true, all_const_zero = true;
22130 for (i = 0; i < n_elts; ++i)
22132 x = XVECEXP (vals, 0, i);
22133 if (!CONSTANT_P (x))
22134 n_var++, one_var = i;
22135 else if (x != CONST0_RTX (inner_mode))
22136 all_const_zero = false;
22137 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
22141 /* Constants are best loaded from the constant pool. */
22144 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
22148 /* If all values are identical, broadcast the value. */
22150 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
22151 XVECEXP (vals, 0, 0)))
22154 /* Values where only one field is non-constant are best loaded from
22155 the pool and overwritten via move later. */
22159 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
22160 XVECEXP (vals, 0, one_var),
22164 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
22168 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
22172 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
22174 enum machine_mode mode = GET_MODE (target);
22175 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22176 bool use_vec_merge = false;
22185 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
22186 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
22188 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
22190 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
22191 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22197 use_vec_merge = TARGET_SSE4_1;
22205 /* For the two element vectors, we implement a VEC_CONCAT with
22206 the extraction of the other element. */
22208 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
22209 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
22212 op0 = val, op1 = tmp;
22214 op0 = tmp, op1 = val;
22216 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
22217 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22222 use_vec_merge = TARGET_SSE4_1;
22229 use_vec_merge = true;
22233 /* tmp = target = A B C D */
22234 tmp = copy_to_reg (target);
22235 /* target = A A B B */
22236 emit_insn (gen_sse_unpcklps (target, target, target));
22237 /* target = X A B B */
22238 ix86_expand_vector_set (false, target, val, 0);
22239 /* target = A X C D */
22240 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22241 GEN_INT (1), GEN_INT (0),
22242 GEN_INT (2+4), GEN_INT (3+4)));
22246 /* tmp = target = A B C D */
22247 tmp = copy_to_reg (target);
22248 /* tmp = X B C D */
22249 ix86_expand_vector_set (false, tmp, val, 0);
22250 /* target = A B X D */
22251 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22252 GEN_INT (0), GEN_INT (1),
22253 GEN_INT (0+4), GEN_INT (3+4)));
22257 /* tmp = target = A B C D */
22258 tmp = copy_to_reg (target);
22259 /* tmp = X B C D */
22260 ix86_expand_vector_set (false, tmp, val, 0);
22261 /* target = A B X D */
22262 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22263 GEN_INT (0), GEN_INT (1),
22264 GEN_INT (2+4), GEN_INT (0+4)));
22268 gcc_unreachable ();
22273 use_vec_merge = TARGET_SSE4_1;
22277 /* Element 0 handled by vec_merge below. */
22280 use_vec_merge = true;
22286 /* With SSE2, use integer shuffles to swap element 0 and ELT,
22287 store into element 0, then shuffle them back. */
22291 order[0] = GEN_INT (elt);
22292 order[1] = const1_rtx;
22293 order[2] = const2_rtx;
22294 order[3] = GEN_INT (3);
22295 order[elt] = const0_rtx;
22297 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22298 order[1], order[2], order[3]));
22300 ix86_expand_vector_set (false, target, val, 0);
22302 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22303 order[1], order[2], order[3]));
22307 /* For SSE1, we have to reuse the V4SF code. */
22308 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
22309 gen_lowpart (SFmode, val), elt);
22314 use_vec_merge = TARGET_SSE2;
22317 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22321 use_vec_merge = TARGET_SSE4_1;
22331 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
22332 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
22333 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22337 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22339 emit_move_insn (mem, target);
22341 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22342 emit_move_insn (tmp, val);
22344 emit_move_insn (target, mem);
22349 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
22351 enum machine_mode mode = GET_MODE (vec);
22352 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22353 bool use_vec_extr = false;
22366 use_vec_extr = true;
22370 use_vec_extr = TARGET_SSE4_1;
22382 tmp = gen_reg_rtx (mode);
22383 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
22384 GEN_INT (elt), GEN_INT (elt),
22385 GEN_INT (elt+4), GEN_INT (elt+4)));
22389 tmp = gen_reg_rtx (mode);
22390 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
22394 gcc_unreachable ();
22397 use_vec_extr = true;
22402 use_vec_extr = TARGET_SSE4_1;
22416 tmp = gen_reg_rtx (mode);
22417 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
22418 GEN_INT (elt), GEN_INT (elt),
22419 GEN_INT (elt), GEN_INT (elt)));
22423 tmp = gen_reg_rtx (mode);
22424 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
22428 gcc_unreachable ();
22431 use_vec_extr = true;
22436 /* For SSE1, we have to reuse the V4SF code. */
22437 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
22438 gen_lowpart (V4SFmode, vec), elt);
22444 use_vec_extr = TARGET_SSE2;
22447 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22451 use_vec_extr = TARGET_SSE4_1;
22455 /* ??? Could extract the appropriate HImode element and shift. */
22462 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
22463 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
22465 /* Let the rtl optimizers know about the zero extension performed. */
22466 if (inner_mode == QImode || inner_mode == HImode)
22468 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
22469 target = gen_lowpart (SImode, target);
22472 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22476 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22478 emit_move_insn (mem, vec);
22480 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22481 emit_move_insn (target, tmp);
22485 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
22486 pattern to reduce; DEST is the destination; IN is the input vector. */
22489 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
22491 rtx tmp1, tmp2, tmp3;
22493 tmp1 = gen_reg_rtx (V4SFmode);
22494 tmp2 = gen_reg_rtx (V4SFmode);
22495 tmp3 = gen_reg_rtx (V4SFmode);
22497 emit_insn (gen_sse_movhlps (tmp1, in, in));
22498 emit_insn (fn (tmp2, tmp1, in));
22500 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
22501 GEN_INT (1), GEN_INT (1),
22502 GEN_INT (1+4), GEN_INT (1+4)));
22503 emit_insn (fn (dest, tmp2, tmp3));
22506 /* Target hook for scalar_mode_supported_p. */
22508 ix86_scalar_mode_supported_p (enum machine_mode mode)
22510 if (DECIMAL_FLOAT_MODE_P (mode))
22512 else if (mode == TFmode)
22513 return TARGET_64BIT;
22515 return default_scalar_mode_supported_p (mode);
22518 /* Implements target hook vector_mode_supported_p. */
22520 ix86_vector_mode_supported_p (enum machine_mode mode)
22522 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
22524 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
22526 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
22528 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
22533 /* Target hook for c_mode_for_suffix. */
22534 static enum machine_mode
22535 ix86_c_mode_for_suffix (char suffix)
22537 if (TARGET_64BIT && suffix == 'q')
22539 if (TARGET_MMX && suffix == 'w')
22545 /* Worker function for TARGET_MD_ASM_CLOBBERS.
22547 We do this in the new i386 backend to maintain source compatibility
22548 with the old cc0-based compiler. */
22551 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
22552 tree inputs ATTRIBUTE_UNUSED,
22555 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
22557 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
22562 /* Implements target vector targetm.asm.encode_section_info. This
22563 is not used by netware. */
22565 static void ATTRIBUTE_UNUSED
22566 ix86_encode_section_info (tree decl, rtx rtl, int first)
22568 default_encode_section_info (decl, rtl, first);
22570 if (TREE_CODE (decl) == VAR_DECL
22571 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
22572 && ix86_in_large_data_p (decl))
22573 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
22576 /* Worker function for REVERSE_CONDITION. */
22579 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
22581 return (mode != CCFPmode && mode != CCFPUmode
22582 ? reverse_condition (code)
22583 : reverse_condition_maybe_unordered (code));
22586 /* Output code to perform an x87 FP register move, from OPERANDS[1]
22590 output_387_reg_move (rtx insn, rtx *operands)
22592 if (REG_P (operands[0]))
22594 if (REG_P (operands[1])
22595 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22597 if (REGNO (operands[0]) == FIRST_STACK_REG)
22598 return output_387_ffreep (operands, 0);
22599 return "fstp\t%y0";
22601 if (STACK_TOP_P (operands[0]))
22602 return "fld%z1\t%y1";
22605 else if (MEM_P (operands[0]))
22607 gcc_assert (REG_P (operands[1]));
22608 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22609 return "fstp%z0\t%y0";
22612 /* There is no non-popping store to memory for XFmode.
22613 So if we need one, follow the store with a load. */
22614 if (GET_MODE (operands[0]) == XFmode)
22615 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
22617 return "fst%z0\t%y0";
22624 /* Output code to perform a conditional jump to LABEL, if C2 flag in
22625 FP status register is set. */
22628 ix86_emit_fp_unordered_jump (rtx label)
22630 rtx reg = gen_reg_rtx (HImode);
22633 emit_insn (gen_x86_fnstsw_1 (reg));
22635 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
22637 emit_insn (gen_x86_sahf_1 (reg));
22639 temp = gen_rtx_REG (CCmode, FLAGS_REG);
22640 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
22644 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
22646 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22647 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
22650 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
22651 gen_rtx_LABEL_REF (VOIDmode, label),
22653 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
22655 emit_jump_insn (temp);
22656 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22659 /* Output code to perform a log1p XFmode calculation. */
22661 void ix86_emit_i387_log1p (rtx op0, rtx op1)
22663 rtx label1 = gen_label_rtx ();
22664 rtx label2 = gen_label_rtx ();
22666 rtx tmp = gen_reg_rtx (XFmode);
22667 rtx tmp2 = gen_reg_rtx (XFmode);
22669 emit_insn (gen_absxf2 (tmp, op1));
22670 emit_insn (gen_cmpxf (tmp,
22671 CONST_DOUBLE_FROM_REAL_VALUE (
22672 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
22674 emit_jump_insn (gen_bge (label1));
22676 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22677 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
22678 emit_jump (label2);
22680 emit_label (label1);
22681 emit_move_insn (tmp, CONST1_RTX (XFmode));
22682 emit_insn (gen_addxf3 (tmp, op1, tmp));
22683 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22684 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
22686 emit_label (label2);
22689 /* Output code to perform a Newton-Rhapson approximation of a single precision
22690 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
22692 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
22694 rtx x0, x1, e0, e1, two;
22696 x0 = gen_reg_rtx (mode);
22697 e0 = gen_reg_rtx (mode);
22698 e1 = gen_reg_rtx (mode);
22699 x1 = gen_reg_rtx (mode);
22701 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
22703 if (VECTOR_MODE_P (mode))
22704 two = ix86_build_const_vector (SFmode, true, two);
22706 two = force_reg (mode, two);
22708 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
22710 /* x0 = 1./b estimate */
22711 emit_insn (gen_rtx_SET (VOIDmode, x0,
22712 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
22715 emit_insn (gen_rtx_SET (VOIDmode, e0,
22716 gen_rtx_MULT (mode, x0, b)));
22718 emit_insn (gen_rtx_SET (VOIDmode, e1,
22719 gen_rtx_MINUS (mode, two, e0)));
22721 emit_insn (gen_rtx_SET (VOIDmode, x1,
22722 gen_rtx_MULT (mode, x0, e1)));
22724 emit_insn (gen_rtx_SET (VOIDmode, res,
22725 gen_rtx_MULT (mode, a, x1)));
22728 /* Output code to perform a Newton-Rhapson approximation of a
22729 single precision floating point [reciprocal] square root. */
22731 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
22734 rtx x0, e0, e1, e2, e3, three, half, zero, mask;
22736 x0 = gen_reg_rtx (mode);
22737 e0 = gen_reg_rtx (mode);
22738 e1 = gen_reg_rtx (mode);
22739 e2 = gen_reg_rtx (mode);
22740 e3 = gen_reg_rtx (mode);
22742 three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
22743 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
22745 mask = gen_reg_rtx (mode);
22747 if (VECTOR_MODE_P (mode))
22749 three = ix86_build_const_vector (SFmode, true, three);
22750 half = ix86_build_const_vector (SFmode, true, half);
22753 three = force_reg (mode, three);
22754 half = force_reg (mode, half);
22756 zero = force_reg (mode, CONST0_RTX(mode));
22758 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
22759 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
22761 /* Compare a to zero. */
22762 emit_insn (gen_rtx_SET (VOIDmode, mask,
22763 gen_rtx_NE (mode, a, zero)));
22765 /* x0 = 1./sqrt(a) estimate */
22766 emit_insn (gen_rtx_SET (VOIDmode, x0,
22767 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
22769 /* Filter out infinity. */
22770 if (VECTOR_MODE_P (mode))
22771 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
22773 gen_lowpart (V4SFmode, x0),
22774 gen_lowpart (V4SFmode, mask))));
22776 emit_insn (gen_rtx_SET (VOIDmode, x0,
22777 gen_rtx_AND (mode, x0, mask)));
22780 emit_insn (gen_rtx_SET (VOIDmode, e0,
22781 gen_rtx_MULT (mode, x0, a)));
22783 emit_insn (gen_rtx_SET (VOIDmode, e1,
22784 gen_rtx_MULT (mode, e0, x0)));
22786 emit_insn (gen_rtx_SET (VOIDmode, e2,
22787 gen_rtx_MINUS (mode, three, e1)));
22790 emit_insn (gen_rtx_SET (VOIDmode, e3,
22791 gen_rtx_MULT (mode, half, x0)));
22794 emit_insn (gen_rtx_SET (VOIDmode, e3,
22795 gen_rtx_MULT (mode, half, e0)));
22796 /* ret = e2 * e3 */
22797 emit_insn (gen_rtx_SET (VOIDmode, res,
22798 gen_rtx_MULT (mode, e2, e3)));
22801 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
22803 static void ATTRIBUTE_UNUSED
22804 i386_solaris_elf_named_section (const char *name, unsigned int flags,
22807 /* With Binutils 2.15, the "@unwind" marker must be specified on
22808 every occurrence of the ".eh_frame" section, not just the first
22811 && strcmp (name, ".eh_frame") == 0)
22813 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
22814 flags & SECTION_WRITE ? "aw" : "a");
22817 default_elf_asm_named_section (name, flags, decl);
22820 /* Return the mangling of TYPE if it is an extended fundamental type. */
22822 static const char *
22823 ix86_mangle_type (tree type)
22825 type = TYPE_MAIN_VARIANT (type);
22827 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
22828 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
22831 switch (TYPE_MODE (type))
22834 /* __float128 is "g". */
22837 /* "long double" or __float80 is "e". */
22844 /* For 32-bit code we can save PIC register setup by using
22845 __stack_chk_fail_local hidden function instead of calling
22846 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
22847 register, so it is better to call __stack_chk_fail directly. */
22850 ix86_stack_protect_fail (void)
22852 return TARGET_64BIT
22853 ? default_external_stack_protect_fail ()
22854 : default_hidden_stack_protect_fail ();
22857 /* Select a format to encode pointers in exception handling data. CODE
22858 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
22859 true if the symbol may be affected by dynamic relocations.
22861 ??? All x86 object file formats are capable of representing this.
22862 After all, the relocation needed is the same as for the call insn.
22863 Whether or not a particular assembler allows us to enter such, I
22864 guess we'll have to see. */
22866 asm_preferred_eh_data_format (int code, int global)
22870 int type = DW_EH_PE_sdata8;
22872 || ix86_cmodel == CM_SMALL_PIC
22873 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
22874 type = DW_EH_PE_sdata4;
22875 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
22877 if (ix86_cmodel == CM_SMALL
22878 || (ix86_cmodel == CM_MEDIUM && code))
22879 return DW_EH_PE_udata4;
22880 return DW_EH_PE_absptr;
22883 /* Expand copysign from SIGN to the positive value ABS_VALUE
22884 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
22887 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
22889 enum machine_mode mode = GET_MODE (sign);
22890 rtx sgn = gen_reg_rtx (mode);
22891 if (mask == NULL_RTX)
22893 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
22894 if (!VECTOR_MODE_P (mode))
22896 /* We need to generate a scalar mode mask in this case. */
22897 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22898 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22899 mask = gen_reg_rtx (mode);
22900 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22904 mask = gen_rtx_NOT (mode, mask);
22905 emit_insn (gen_rtx_SET (VOIDmode, sgn,
22906 gen_rtx_AND (mode, mask, sign)));
22907 emit_insn (gen_rtx_SET (VOIDmode, result,
22908 gen_rtx_IOR (mode, abs_value, sgn)));
22911 /* Expand fabs (OP0) and return a new rtx that holds the result. The
22912 mask for masking out the sign-bit is stored in *SMASK, if that is
22915 ix86_expand_sse_fabs (rtx op0, rtx *smask)
22917 enum machine_mode mode = GET_MODE (op0);
22920 xa = gen_reg_rtx (mode);
22921 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
22922 if (!VECTOR_MODE_P (mode))
22924 /* We need to generate a scalar mode mask in this case. */
22925 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22926 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22927 mask = gen_reg_rtx (mode);
22928 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22930 emit_insn (gen_rtx_SET (VOIDmode, xa,
22931 gen_rtx_AND (mode, op0, mask)));
22939 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
22940 swapping the operands if SWAP_OPERANDS is true. The expanded
22941 code is a forward jump to a newly created label in case the
22942 comparison is true. The generated label rtx is returned. */
22944 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
22945 bool swap_operands)
22956 label = gen_label_rtx ();
22957 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
22958 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22959 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
22960 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
22961 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22962 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
22963 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22964 JUMP_LABEL (tmp) = label;
22969 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
22970 using comparison code CODE. Operands are swapped for the comparison if
22971 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
22973 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
22974 bool swap_operands)
22976 enum machine_mode mode = GET_MODE (op0);
22977 rtx mask = gen_reg_rtx (mode);
22986 if (mode == DFmode)
22987 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
22988 gen_rtx_fmt_ee (code, mode, op0, op1)));
22990 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
22991 gen_rtx_fmt_ee (code, mode, op0, op1)));
22996 /* Generate and return a rtx of mode MODE for 2**n where n is the number
22997 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
22999 ix86_gen_TWO52 (enum machine_mode mode)
23001 REAL_VALUE_TYPE TWO52r;
23004 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
23005 TWO52 = const_double_from_real_value (TWO52r, mode);
23006 TWO52 = force_reg (mode, TWO52);
23011 /* Expand SSE sequence for computing lround from OP1 storing
23014 ix86_expand_lround (rtx op0, rtx op1)
23016 /* C code for the stuff we're doing below:
23017 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
23020 enum machine_mode mode = GET_MODE (op1);
23021 const struct real_format *fmt;
23022 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
23025 /* load nextafter (0.5, 0.0) */
23026 fmt = REAL_MODE_FORMAT (mode);
23027 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
23028 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
23030 /* adj = copysign (0.5, op1) */
23031 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
23032 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
23034 /* adj = op1 + adj */
23035 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
23037 /* op0 = (imode)adj */
23038 expand_fix (op0, adj, 0);
23041 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
23044 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
23046 /* C code for the stuff we're doing below (for do_floor):
23048 xi -= (double)xi > op1 ? 1 : 0;
23051 enum machine_mode fmode = GET_MODE (op1);
23052 enum machine_mode imode = GET_MODE (op0);
23053 rtx ireg, freg, label, tmp;
23055 /* reg = (long)op1 */
23056 ireg = gen_reg_rtx (imode);
23057 expand_fix (ireg, op1, 0);
23059 /* freg = (double)reg */
23060 freg = gen_reg_rtx (fmode);
23061 expand_float (freg, ireg, 0);
23063 /* ireg = (freg > op1) ? ireg - 1 : ireg */
23064 label = ix86_expand_sse_compare_and_jump (UNLE,
23065 freg, op1, !do_floor);
23066 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
23067 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
23068 emit_move_insn (ireg, tmp);
23070 emit_label (label);
23071 LABEL_NUSES (label) = 1;
23073 emit_move_insn (op0, ireg);
23076 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
23077 result in OPERAND0. */
23079 ix86_expand_rint (rtx operand0, rtx operand1)
23081 /* C code for the stuff we're doing below:
23082 xa = fabs (operand1);
23083 if (!isless (xa, 2**52))
23085 xa = xa + 2**52 - 2**52;
23086 return copysign (xa, operand1);
23088 enum machine_mode mode = GET_MODE (operand0);
23089 rtx res, xa, label, TWO52, mask;
23091 res = gen_reg_rtx (mode);
23092 emit_move_insn (res, operand1);
23094 /* xa = abs (operand1) */
23095 xa = ix86_expand_sse_fabs (res, &mask);
23097 /* if (!isless (xa, TWO52)) goto label; */
23098 TWO52 = ix86_gen_TWO52 (mode);
23099 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23101 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23102 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
23104 ix86_sse_copysign_to_positive (res, xa, res, mask);
23106 emit_label (label);
23107 LABEL_NUSES (label) = 1;
23109 emit_move_insn (operand0, res);
23112 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23115 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
23117 /* C code for the stuff we expand below.
23118 double xa = fabs (x), x2;
23119 if (!isless (xa, TWO52))
23121 xa = xa + TWO52 - TWO52;
23122 x2 = copysign (xa, x);
23131 enum machine_mode mode = GET_MODE (operand0);
23132 rtx xa, TWO52, tmp, label, one, res, mask;
23134 TWO52 = ix86_gen_TWO52 (mode);
23136 /* Temporary for holding the result, initialized to the input
23137 operand to ease control flow. */
23138 res = gen_reg_rtx (mode);
23139 emit_move_insn (res, operand1);
23141 /* xa = abs (operand1) */
23142 xa = ix86_expand_sse_fabs (res, &mask);
23144 /* if (!isless (xa, TWO52)) goto label; */
23145 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23147 /* xa = xa + TWO52 - TWO52; */
23148 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23149 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
23151 /* xa = copysign (xa, operand1) */
23152 ix86_sse_copysign_to_positive (xa, xa, res, mask);
23154 /* generate 1.0 or -1.0 */
23155 one = force_reg (mode,
23156 const_double_from_real_value (do_floor
23157 ? dconst1 : dconstm1, mode));
23159 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23160 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
23161 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23162 gen_rtx_AND (mode, one, tmp)));
23163 /* We always need to subtract here to preserve signed zero. */
23164 tmp = expand_simple_binop (mode, MINUS,
23165 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23166 emit_move_insn (res, tmp);
23168 emit_label (label);
23169 LABEL_NUSES (label) = 1;
23171 emit_move_insn (operand0, res);
23174 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23177 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
23179 /* C code for the stuff we expand below.
23180 double xa = fabs (x), x2;
23181 if (!isless (xa, TWO52))
23183 x2 = (double)(long)x;
23190 if (HONOR_SIGNED_ZEROS (mode))
23191 return copysign (x2, x);
23194 enum machine_mode mode = GET_MODE (operand0);
23195 rtx xa, xi, TWO52, tmp, label, one, res, mask;
23197 TWO52 = ix86_gen_TWO52 (mode);
23199 /* Temporary for holding the result, initialized to the input
23200 operand to ease control flow. */
23201 res = gen_reg_rtx (mode);
23202 emit_move_insn (res, operand1);
23204 /* xa = abs (operand1) */
23205 xa = ix86_expand_sse_fabs (res, &mask);
23207 /* if (!isless (xa, TWO52)) goto label; */
23208 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23210 /* xa = (double)(long)x */
23211 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23212 expand_fix (xi, res, 0);
23213 expand_float (xa, xi, 0);
23216 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23218 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23219 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
23220 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23221 gen_rtx_AND (mode, one, tmp)));
23222 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
23223 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23224 emit_move_insn (res, tmp);
23226 if (HONOR_SIGNED_ZEROS (mode))
23227 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23229 emit_label (label);
23230 LABEL_NUSES (label) = 1;
23232 emit_move_insn (operand0, res);
23235 /* Expand SSE sequence for computing round from OPERAND1 storing
23236 into OPERAND0. Sequence that works without relying on DImode truncation
23237 via cvttsd2siq that is only available on 64bit targets. */
23239 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
23241 /* C code for the stuff we expand below.
23242 double xa = fabs (x), xa2, x2;
23243 if (!isless (xa, TWO52))
23245 Using the absolute value and copying back sign makes
23246 -0.0 -> -0.0 correct.
23247 xa2 = xa + TWO52 - TWO52;
23252 else if (dxa > 0.5)
23254 x2 = copysign (xa2, x);
23257 enum machine_mode mode = GET_MODE (operand0);
23258 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
23260 TWO52 = ix86_gen_TWO52 (mode);
23262 /* Temporary for holding the result, initialized to the input
23263 operand to ease control flow. */
23264 res = gen_reg_rtx (mode);
23265 emit_move_insn (res, operand1);
23267 /* xa = abs (operand1) */
23268 xa = ix86_expand_sse_fabs (res, &mask);
23270 /* if (!isless (xa, TWO52)) goto label; */
23271 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23273 /* xa2 = xa + TWO52 - TWO52; */
23274 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23275 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
23277 /* dxa = xa2 - xa; */
23278 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
23280 /* generate 0.5, 1.0 and -0.5 */
23281 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
23282 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
23283 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
23287 tmp = gen_reg_rtx (mode);
23288 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
23289 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
23290 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23291 gen_rtx_AND (mode, one, tmp)));
23292 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23293 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
23294 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
23295 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23296 gen_rtx_AND (mode, one, tmp)));
23297 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23299 /* res = copysign (xa2, operand1) */
23300 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
23302 emit_label (label);
23303 LABEL_NUSES (label) = 1;
23305 emit_move_insn (operand0, res);
23308 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23311 ix86_expand_trunc (rtx operand0, rtx operand1)
23313 /* C code for SSE variant we expand below.
23314 double xa = fabs (x), x2;
23315 if (!isless (xa, TWO52))
23317 x2 = (double)(long)x;
23318 if (HONOR_SIGNED_ZEROS (mode))
23319 return copysign (x2, x);
23322 enum machine_mode mode = GET_MODE (operand0);
23323 rtx xa, xi, TWO52, label, res, mask;
23325 TWO52 = ix86_gen_TWO52 (mode);
23327 /* Temporary for holding the result, initialized to the input
23328 operand to ease control flow. */
23329 res = gen_reg_rtx (mode);
23330 emit_move_insn (res, operand1);
23332 /* xa = abs (operand1) */
23333 xa = ix86_expand_sse_fabs (res, &mask);
23335 /* if (!isless (xa, TWO52)) goto label; */
23336 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23338 /* x = (double)(long)x */
23339 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23340 expand_fix (xi, res, 0);
23341 expand_float (res, xi, 0);
23343 if (HONOR_SIGNED_ZEROS (mode))
23344 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23346 emit_label (label);
23347 LABEL_NUSES (label) = 1;
23349 emit_move_insn (operand0, res);
23352 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23355 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
23357 enum machine_mode mode = GET_MODE (operand0);
23358 rtx xa, mask, TWO52, label, one, res, smask, tmp;
23360 /* C code for SSE variant we expand below.
23361 double xa = fabs (x), x2;
23362 if (!isless (xa, TWO52))
23364 xa2 = xa + TWO52 - TWO52;
23368 x2 = copysign (xa2, x);
23372 TWO52 = ix86_gen_TWO52 (mode);
23374 /* Temporary for holding the result, initialized to the input
23375 operand to ease control flow. */
23376 res = gen_reg_rtx (mode);
23377 emit_move_insn (res, operand1);
23379 /* xa = abs (operand1) */
23380 xa = ix86_expand_sse_fabs (res, &smask);
23382 /* if (!isless (xa, TWO52)) goto label; */
23383 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23385 /* res = xa + TWO52 - TWO52; */
23386 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23387 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
23388 emit_move_insn (res, tmp);
23391 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23393 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
23394 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
23395 emit_insn (gen_rtx_SET (VOIDmode, mask,
23396 gen_rtx_AND (mode, mask, one)));
23397 tmp = expand_simple_binop (mode, MINUS,
23398 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
23399 emit_move_insn (res, tmp);
23401 /* res = copysign (res, operand1) */
23402 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
23404 emit_label (label);
23405 LABEL_NUSES (label) = 1;
23407 emit_move_insn (operand0, res);
23410 /* Expand SSE sequence for computing round from OPERAND1 storing
23413 ix86_expand_round (rtx operand0, rtx operand1)
23415 /* C code for the stuff we're doing below:
23416 double xa = fabs (x);
23417 if (!isless (xa, TWO52))
23419 xa = (double)(long)(xa + nextafter (0.5, 0.0));
23420 return copysign (xa, x);
23422 enum machine_mode mode = GET_MODE (operand0);
23423 rtx res, TWO52, xa, label, xi, half, mask;
23424 const struct real_format *fmt;
23425 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
23427 /* Temporary for holding the result, initialized to the input
23428 operand to ease control flow. */
23429 res = gen_reg_rtx (mode);
23430 emit_move_insn (res, operand1);
23432 TWO52 = ix86_gen_TWO52 (mode);
23433 xa = ix86_expand_sse_fabs (res, &mask);
23434 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23436 /* load nextafter (0.5, 0.0) */
23437 fmt = REAL_MODE_FORMAT (mode);
23438 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
23439 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
23441 /* xa = xa + 0.5 */
23442 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
23443 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
23445 /* xa = (double)(int64_t)xa */
23446 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23447 expand_fix (xi, xa, 0);
23448 expand_float (xa, xi, 0);
23450 /* res = copysign (xa, operand1) */
23451 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
23453 emit_label (label);
23454 LABEL_NUSES (label) = 1;
23456 emit_move_insn (operand0, res);
23460 /* Table of valid machine attributes. */
23461 static const struct attribute_spec ix86_attribute_table[] =
23463 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
23464 /* Stdcall attribute says callee is responsible for popping arguments
23465 if they are not variable. */
23466 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23467 /* Fastcall attribute says callee is responsible for popping arguments
23468 if they are not variable. */
23469 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23470 /* Cdecl attribute says the callee is a normal C declaration */
23471 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23472 /* Regparm attribute specifies how many integer arguments are to be
23473 passed in registers. */
23474 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
23475 /* Sseregparm attribute says we are using x86_64 calling conventions
23476 for FP arguments. */
23477 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23478 /* force_align_arg_pointer says this function realigns the stack at entry. */
23479 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
23480 false, true, true, ix86_handle_cconv_attribute },
23481 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23482 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
23483 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
23484 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
23486 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23487 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23488 #ifdef SUBTARGET_ATTRIBUTE_TABLE
23489 SUBTARGET_ATTRIBUTE_TABLE,
23491 { NULL, 0, 0, false, false, false, NULL }
23494 /* Initialize the GCC target structure. */
23495 #undef TARGET_ATTRIBUTE_TABLE
23496 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23497 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23498 # undef TARGET_MERGE_DECL_ATTRIBUTES
23499 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23502 #undef TARGET_COMP_TYPE_ATTRIBUTES
23503 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23505 #undef TARGET_INIT_BUILTINS
23506 #define TARGET_INIT_BUILTINS ix86_init_builtins
23507 #undef TARGET_EXPAND_BUILTIN
23508 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23510 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23511 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
23512 ix86_builtin_vectorized_function
23514 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
23515 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
23517 #undef TARGET_BUILTIN_RECIPROCAL
23518 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
23520 #undef TARGET_ASM_FUNCTION_EPILOGUE
23521 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23523 #undef TARGET_ENCODE_SECTION_INFO
23524 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23525 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23527 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23530 #undef TARGET_ASM_OPEN_PAREN
23531 #define TARGET_ASM_OPEN_PAREN ""
23532 #undef TARGET_ASM_CLOSE_PAREN
23533 #define TARGET_ASM_CLOSE_PAREN ""
23535 #undef TARGET_ASM_ALIGNED_HI_OP
23536 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23537 #undef TARGET_ASM_ALIGNED_SI_OP
23538 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23540 #undef TARGET_ASM_ALIGNED_DI_OP
23541 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23544 #undef TARGET_ASM_UNALIGNED_HI_OP
23545 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23546 #undef TARGET_ASM_UNALIGNED_SI_OP
23547 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23548 #undef TARGET_ASM_UNALIGNED_DI_OP
23549 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23551 #undef TARGET_SCHED_ADJUST_COST
23552 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23553 #undef TARGET_SCHED_ISSUE_RATE
23554 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23555 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23556 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23557 ia32_multipass_dfa_lookahead
23559 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23560 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23563 #undef TARGET_HAVE_TLS
23564 #define TARGET_HAVE_TLS true
23566 #undef TARGET_CANNOT_FORCE_CONST_MEM
23567 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23568 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23569 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
23571 #undef TARGET_DELEGITIMIZE_ADDRESS
23572 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23574 #undef TARGET_MS_BITFIELD_LAYOUT_P
23575 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23578 #undef TARGET_BINDS_LOCAL_P
23579 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23581 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23582 #undef TARGET_BINDS_LOCAL_P
23583 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23586 #undef TARGET_ASM_OUTPUT_MI_THUNK
23587 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23588 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23589 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23591 #undef TARGET_ASM_FILE_START
23592 #define TARGET_ASM_FILE_START x86_file_start
23594 #undef TARGET_DEFAULT_TARGET_FLAGS
23595 #define TARGET_DEFAULT_TARGET_FLAGS \
23597 | TARGET_SUBTARGET_DEFAULT \
23598 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
23600 #undef TARGET_HANDLE_OPTION
23601 #define TARGET_HANDLE_OPTION ix86_handle_option
23603 #undef TARGET_RTX_COSTS
23604 #define TARGET_RTX_COSTS ix86_rtx_costs
23605 #undef TARGET_ADDRESS_COST
23606 #define TARGET_ADDRESS_COST ix86_address_cost
23608 #undef TARGET_FIXED_CONDITION_CODE_REGS
23609 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23610 #undef TARGET_CC_MODES_COMPATIBLE
23611 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23613 #undef TARGET_MACHINE_DEPENDENT_REORG
23614 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23616 #undef TARGET_BUILD_BUILTIN_VA_LIST
23617 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23619 #undef TARGET_MD_ASM_CLOBBERS
23620 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
23622 #undef TARGET_PROMOTE_PROTOTYPES
23623 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
23624 #undef TARGET_STRUCT_VALUE_RTX
23625 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
23626 #undef TARGET_SETUP_INCOMING_VARARGS
23627 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23628 #undef TARGET_MUST_PASS_IN_STACK
23629 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23630 #undef TARGET_PASS_BY_REFERENCE
23631 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23632 #undef TARGET_INTERNAL_ARG_POINTER
23633 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23634 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
23635 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
23636 #undef TARGET_STRICT_ARGUMENT_NAMING
23637 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23639 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23640 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23642 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23643 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23645 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23646 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23648 #undef TARGET_C_MODE_FOR_SUFFIX
23649 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23652 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23653 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23656 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23657 #undef TARGET_INSERT_ATTRIBUTES
23658 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23661 #undef TARGET_MANGLE_TYPE
23662 #define TARGET_MANGLE_TYPE ix86_mangle_type
23664 #undef TARGET_STACK_PROTECT_FAIL
23665 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23667 #undef TARGET_FUNCTION_VALUE
23668 #define TARGET_FUNCTION_VALUE ix86_function_value
23670 struct gcc_target targetm = TARGET_INITIALIZER;
23672 #include "gt-i386.h"