1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
51 #include "tree-gimple.h"
54 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs size_cost = { /* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of loading integer registers */
995 2, /* cost of moving MMX register */
996 {6, 6}, /* cost of loading MMX registers
997 in SImode and DImode */
998 {4, 4}, /* cost of storing MMX registers
999 in SImode and DImode */
1000 2, /* cost of moving SSE register */
1001 {6, 6, 6}, /* cost of loading SSE registers
1002 in SImode, DImode and TImode */
1003 {4, 4, 4}, /* cost of storing SSE registers
1004 in SImode, DImode and TImode */
1005 2, /* MMX or SSE register to integer */
1006 32, /* size of l1 cache. */
1007 2048, /* size of l2 cache. */
1008 128, /* size of prefetch block */
1009 8, /* number of parallel prefetches */
1010 3, /* Branch cost */
1011 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1017 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020 {{libcall, {{8, loop}, {15, unrolled_loop},
1021 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022 {libcall, {{24, loop}, {32, unrolled_loop},
1023 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024 1, /* scalar_stmt_cost. */
1025 1, /* scalar load_cost. */
1026 1, /* scalar_store_cost. */
1027 1, /* vec_stmt_cost. */
1028 1, /* vec_to_scalar_cost. */
1029 1, /* scalar_to_vec_cost. */
1030 1, /* vec_align_load_cost. */
1031 2, /* vec_unalign_load_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1037 /* Generic64 should produce code tuned for Nocona and K8. */
1039 struct processor_costs generic64_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 /* On all chips taken into consideration lea is 2 cycles and more. With
1042 this cost however our current implementation of synth_mult results in
1043 use of unnecessary temporary registers causing regression on several
1044 SPECfp benchmarks. */
1045 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1046 COSTS_N_INSNS (1), /* variable shift costs */
1047 COSTS_N_INSNS (1), /* constant shift costs */
1048 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1049 COSTS_N_INSNS (4), /* HI */
1050 COSTS_N_INSNS (3), /* SI */
1051 COSTS_N_INSNS (4), /* DI */
1052 COSTS_N_INSNS (2)}, /* other */
1053 0, /* cost of multiply per each bit set */
1054 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1055 COSTS_N_INSNS (26), /* HI */
1056 COSTS_N_INSNS (42), /* SI */
1057 COSTS_N_INSNS (74), /* DI */
1058 COSTS_N_INSNS (74)}, /* other */
1059 COSTS_N_INSNS (1), /* cost of movsx */
1060 COSTS_N_INSNS (1), /* cost of movzx */
1061 8, /* "large" insn */
1062 17, /* MOVE_RATIO */
1063 4, /* cost for loading QImode using movzbl */
1064 {4, 4, 4}, /* cost of loading integer registers
1065 in QImode, HImode and SImode.
1066 Relative to reg-reg move (2). */
1067 {4, 4, 4}, /* cost of storing integer registers */
1068 4, /* cost of reg,reg fld/fst */
1069 {12, 12, 12}, /* cost of loading fp registers
1070 in SFmode, DFmode and XFmode */
1071 {6, 6, 8}, /* cost of storing fp registers
1072 in SFmode, DFmode and XFmode */
1073 2, /* cost of moving MMX register */
1074 {8, 8}, /* cost of loading MMX registers
1075 in SImode and DImode */
1076 {8, 8}, /* cost of storing MMX registers
1077 in SImode and DImode */
1078 2, /* cost of moving SSE register */
1079 {8, 8, 8}, /* cost of loading SSE registers
1080 in SImode, DImode and TImode */
1081 {8, 8, 8}, /* cost of storing SSE registers
1082 in SImode, DImode and TImode */
1083 5, /* MMX or SSE register to integer */
1084 32, /* size of l1 cache. */
1085 512, /* size of l2 cache. */
1086 64, /* size of prefetch block */
1087 6, /* number of parallel prefetches */
1088 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089 is increased to perhaps more appropriate value of 5. */
1090 3, /* Branch cost */
1091 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1, /* scalar_stmt_cost. */
1102 1, /* scalar load_cost. */
1103 1, /* scalar_store_cost. */
1104 1, /* vec_stmt_cost. */
1105 1, /* vec_to_scalar_cost. */
1106 1, /* scalar_to_vec_cost. */
1107 1, /* vec_align_load_cost. */
1108 2, /* vec_unalign_load_cost. */
1109 1, /* vec_store_cost. */
1110 3, /* cond_taken_branch_cost. */
1111 1, /* cond_not_taken_branch_cost. */
1114 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1116 struct processor_costs generic32_cost = {
1117 COSTS_N_INSNS (1), /* cost of an add instruction */
1118 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1119 COSTS_N_INSNS (1), /* variable shift costs */
1120 COSTS_N_INSNS (1), /* constant shift costs */
1121 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1122 COSTS_N_INSNS (4), /* HI */
1123 COSTS_N_INSNS (3), /* SI */
1124 COSTS_N_INSNS (4), /* DI */
1125 COSTS_N_INSNS (2)}, /* other */
1126 0, /* cost of multiply per each bit set */
1127 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1128 COSTS_N_INSNS (26), /* HI */
1129 COSTS_N_INSNS (42), /* SI */
1130 COSTS_N_INSNS (74), /* DI */
1131 COSTS_N_INSNS (74)}, /* other */
1132 COSTS_N_INSNS (1), /* cost of movsx */
1133 COSTS_N_INSNS (1), /* cost of movzx */
1134 8, /* "large" insn */
1135 17, /* MOVE_RATIO */
1136 4, /* cost for loading QImode using movzbl */
1137 {4, 4, 4}, /* cost of loading integer registers
1138 in QImode, HImode and SImode.
1139 Relative to reg-reg move (2). */
1140 {4, 4, 4}, /* cost of storing integer registers */
1141 4, /* cost of reg,reg fld/fst */
1142 {12, 12, 12}, /* cost of loading fp registers
1143 in SFmode, DFmode and XFmode */
1144 {6, 6, 8}, /* cost of storing fp registers
1145 in SFmode, DFmode and XFmode */
1146 2, /* cost of moving MMX register */
1147 {8, 8}, /* cost of loading MMX registers
1148 in SImode and DImode */
1149 {8, 8}, /* cost of storing MMX registers
1150 in SImode and DImode */
1151 2, /* cost of moving SSE register */
1152 {8, 8, 8}, /* cost of loading SSE registers
1153 in SImode, DImode and TImode */
1154 {8, 8, 8}, /* cost of storing SSE registers
1155 in SImode, DImode and TImode */
1156 5, /* MMX or SSE register to integer */
1157 32, /* size of l1 cache. */
1158 256, /* size of l2 cache. */
1159 64, /* size of prefetch block */
1160 6, /* number of parallel prefetches */
1161 3, /* Branch cost */
1162 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 1, /* scalar_stmt_cost. */
1173 1, /* scalar load_cost. */
1174 1, /* scalar_store_cost. */
1175 1, /* vec_stmt_cost. */
1176 1, /* vec_to_scalar_cost. */
1177 1, /* scalar_to_vec_cost. */
1178 1, /* vec_align_load_cost. */
1179 2, /* vec_unalign_load_cost. */
1180 1, /* vec_store_cost. */
1181 3, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1185 const struct processor_costs *ix86_cost = &pentium_cost;
1187 /* Processor feature/optimization bitmasks. */
1188 #define m_386 (1<<PROCESSOR_I386)
1189 #define m_486 (1<<PROCESSOR_I486)
1190 #define m_PENT (1<<PROCESSOR_PENTIUM)
1191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1192 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1193 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1194 #define m_CORE2 (1<<PROCESSOR_CORE2)
1196 #define m_GEODE (1<<PROCESSOR_GEODE)
1197 #define m_K6 (1<<PROCESSOR_K6)
1198 #define m_K6_GEODE (m_K6 | m_GEODE)
1199 #define m_K8 (1<<PROCESSOR_K8)
1200 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1201 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1202 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1203 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1205 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1208 /* Generic instruction choice should be common subset of supported CPUs
1209 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1210 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1212 /* Feature tests against the various tunings. */
1213 unsigned char ix86_tune_features[X86_TUNE_LAST];
1215 /* Feature tests against the various tunings used to create ix86_tune_features
1216 based on the processor mask. */
1217 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1218 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1219 negatively, so enabling for Generic64 seems like good code size
1220 tradeoff. We can't enable it for 32bit generic because it does not
1221 work well with PPro base chips. */
1222 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1224 /* X86_TUNE_PUSH_MEMORY */
1225 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1226 | m_NOCONA | m_CORE2 | m_GENERIC,
1228 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1231 /* X86_TUNE_USE_BIT_TEST */
1234 /* X86_TUNE_UNROLL_STRLEN */
1235 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1237 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1238 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1240 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1241 on simulation result. But after P4 was made, no performance benefit
1242 was observed with branch hints. It also increases the code size.
1243 As a result, icc never generates branch hints. */
1246 /* X86_TUNE_DOUBLE_WITH_ADD */
1249 /* X86_TUNE_USE_SAHF */
1250 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1251 | m_NOCONA | m_CORE2 | m_GENERIC,
1253 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1254 partial dependencies. */
1255 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1256 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1258 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1259 register stalls on Generic32 compilation setting as well. However
1260 in current implementation the partial register stalls are not eliminated
1261 very well - they can be introduced via subregs synthesized by combine
1262 and can happen in caller/callee saving sequences. Because this option
1263 pays back little on PPro based chips and is in conflict with partial reg
1264 dependencies used by Athlon/P4 based chips, it is better to leave it off
1265 for generic32 for now. */
1268 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1269 m_CORE2 | m_GENERIC,
1271 /* X86_TUNE_USE_HIMODE_FIOP */
1272 m_386 | m_486 | m_K6_GEODE,
1274 /* X86_TUNE_USE_SIMODE_FIOP */
1275 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1277 /* X86_TUNE_USE_MOV0 */
1280 /* X86_TUNE_USE_CLTD */
1281 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1283 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1286 /* X86_TUNE_SPLIT_LONG_MOVES */
1289 /* X86_TUNE_READ_MODIFY_WRITE */
1292 /* X86_TUNE_READ_MODIFY */
1295 /* X86_TUNE_PROMOTE_QIMODE */
1296 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1297 | m_GENERIC /* | m_PENT4 ? */,
1299 /* X86_TUNE_FAST_PREFIX */
1300 ~(m_PENT | m_486 | m_386),
1302 /* X86_TUNE_SINGLE_STRINGOP */
1303 m_386 | m_PENT4 | m_NOCONA,
1305 /* X86_TUNE_QIMODE_MATH */
1308 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1309 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1310 might be considered for Generic32 if our scheme for avoiding partial
1311 stalls was more effective. */
1314 /* X86_TUNE_PROMOTE_QI_REGS */
1317 /* X86_TUNE_PROMOTE_HI_REGS */
1320 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1321 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1323 /* X86_TUNE_ADD_ESP_8 */
1324 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1325 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1327 /* X86_TUNE_SUB_ESP_4 */
1328 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1330 /* X86_TUNE_SUB_ESP_8 */
1331 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1332 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1334 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1335 for DFmode copies */
1336 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1337 | m_GENERIC | m_GEODE),
1339 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1340 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1342 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1343 conflict here in between PPro/Pentium4 based chips that thread 128bit
1344 SSE registers as single units versus K8 based chips that divide SSE
1345 registers to two 64bit halves. This knob promotes all store destinations
1346 to be 128bit to allow register renaming on 128bit SSE units, but usually
1347 results in one extra microop on 64bit SSE units. Experimental results
1348 shows that disabling this option on P4 brings over 20% SPECfp regression,
1349 while enabling it on K8 brings roughly 2.4% regression that can be partly
1350 masked by careful scheduling of moves. */
1351 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1353 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1356 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1357 are resolved on SSE register parts instead of whole registers, so we may
1358 maintain just lower part of scalar values in proper format leaving the
1359 upper part undefined. */
1362 /* X86_TUNE_SSE_TYPELESS_STORES */
1365 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1366 m_PPRO | m_PENT4 | m_NOCONA,
1368 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1369 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1371 /* X86_TUNE_PROLOGUE_USING_MOVE */
1372 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1374 /* X86_TUNE_EPILOGUE_USING_MOVE */
1375 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1377 /* X86_TUNE_SHIFT1 */
1380 /* X86_TUNE_USE_FFREEP */
1383 /* X86_TUNE_INTER_UNIT_MOVES */
1384 ~(m_AMD_MULTIPLE | m_GENERIC),
1386 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1389 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1390 than 4 branch instructions in the 16 byte window. */
1391 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_SCHEDULE */
1394 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1396 /* X86_TUNE_USE_BT */
1397 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1399 /* X86_TUNE_USE_INCDEC */
1400 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1402 /* X86_TUNE_PAD_RETURNS */
1403 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1405 /* X86_TUNE_EXT_80387_CONSTANTS */
1406 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1408 /* X86_TUNE_SHORTEN_X87_SSE */
1411 /* X86_TUNE_AVOID_VECTOR_DECODE */
1414 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1415 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1418 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1419 vector path on AMD machines. */
1420 m_K8 | m_GENERIC64 | m_AMDFAM10,
1422 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1424 m_K8 | m_GENERIC64 | m_AMDFAM10,
1426 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1430 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1431 but one byte longer. */
1434 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1435 operand that cannot be represented using a modRM byte. The XOR
1436 replacement is long decoded, so this split helps here as well. */
1439 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1440 from integer to FP. */
1443 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1444 with a subsequent conditional jump instruction into a single
1445 compare-and-branch uop. */
1449 /* Feature tests against the various architecture variations. */
1450 unsigned char ix86_arch_features[X86_ARCH_LAST];
1452 /* Feature tests against the various architecture variations, used to create
1453 ix86_arch_features based on the processor mask. */
1454 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1455 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1456 ~(m_386 | m_486 | m_PENT | m_K6),
1458 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1461 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1464 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1467 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1471 static const unsigned int x86_accumulate_outgoing_args
1472 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1474 static const unsigned int x86_arch_always_fancy_math_387
1475 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1476 | m_NOCONA | m_CORE2 | m_GENERIC;
1478 static enum stringop_alg stringop_alg = no_stringop;
1480 /* In case the average insn count for single function invocation is
1481 lower than this constant, emit fast (but longer) prologue and
1483 #define FAST_PROLOGUE_INSN_COUNT 20
1485 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1486 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1487 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1488 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1490 /* Array of the smallest class containing reg number REGNO, indexed by
1491 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1493 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1495 /* ax, dx, cx, bx */
1496 AREG, DREG, CREG, BREG,
1497 /* si, di, bp, sp */
1498 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1500 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1501 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1504 /* flags, fpsr, fpcr, frame */
1505 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1507 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1510 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1513 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1514 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1515 /* SSE REX registers */
1516 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1520 /* The "default" register map used in 32bit mode. */
1522 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1524 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1525 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1526 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1527 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1528 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1529 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1530 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1533 static int const x86_64_int_parameter_registers[6] =
1535 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1536 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1539 static int const x86_64_ms_abi_int_parameter_registers[4] =
1541 2 /*RCX*/, 1 /*RDX*/,
1542 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1545 static int const x86_64_int_return_registers[4] =
1547 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1550 /* The "default" register map used in 64bit mode. */
1551 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1553 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1554 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1555 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1556 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1557 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1558 8,9,10,11,12,13,14,15, /* extended integer registers */
1559 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1562 /* Define the register numbers to be used in Dwarf debugging information.
1563 The SVR4 reference port C compiler uses the following register numbers
1564 in its Dwarf output code:
1565 0 for %eax (gcc regno = 0)
1566 1 for %ecx (gcc regno = 2)
1567 2 for %edx (gcc regno = 1)
1568 3 for %ebx (gcc regno = 3)
1569 4 for %esp (gcc regno = 7)
1570 5 for %ebp (gcc regno = 6)
1571 6 for %esi (gcc regno = 4)
1572 7 for %edi (gcc regno = 5)
1573 The following three DWARF register numbers are never generated by
1574 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1575 believes these numbers have these meanings.
1576 8 for %eip (no gcc equivalent)
1577 9 for %eflags (gcc regno = 17)
1578 10 for %trapno (no gcc equivalent)
1579 It is not at all clear how we should number the FP stack registers
1580 for the x86 architecture. If the version of SDB on x86/svr4 were
1581 a bit less brain dead with respect to floating-point then we would
1582 have a precedent to follow with respect to DWARF register numbers
1583 for x86 FP registers, but the SDB on x86/svr4 is so completely
1584 broken with respect to FP registers that it is hardly worth thinking
1585 of it as something to strive for compatibility with.
1586 The version of x86/svr4 SDB I have at the moment does (partially)
1587 seem to believe that DWARF register number 11 is associated with
1588 the x86 register %st(0), but that's about all. Higher DWARF
1589 register numbers don't seem to be associated with anything in
1590 particular, and even for DWARF regno 11, SDB only seems to under-
1591 stand that it should say that a variable lives in %st(0) (when
1592 asked via an `=' command) if we said it was in DWARF regno 11,
1593 but SDB still prints garbage when asked for the value of the
1594 variable in question (via a `/' command).
1595 (Also note that the labels SDB prints for various FP stack regs
1596 when doing an `x' command are all wrong.)
1597 Note that these problems generally don't affect the native SVR4
1598 C compiler because it doesn't allow the use of -O with -g and
1599 because when it is *not* optimizing, it allocates a memory
1600 location for each floating-point variable, and the memory
1601 location is what gets described in the DWARF AT_location
1602 attribute for the variable in question.
1603 Regardless of the severe mental illness of the x86/svr4 SDB, we
1604 do something sensible here and we use the following DWARF
1605 register numbers. Note that these are all stack-top-relative
1607 11 for %st(0) (gcc regno = 8)
1608 12 for %st(1) (gcc regno = 9)
1609 13 for %st(2) (gcc regno = 10)
1610 14 for %st(3) (gcc regno = 11)
1611 15 for %st(4) (gcc regno = 12)
1612 16 for %st(5) (gcc regno = 13)
1613 17 for %st(6) (gcc regno = 14)
1614 18 for %st(7) (gcc regno = 15)
1616 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1618 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1619 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1620 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1627 /* Test and compare insns in i386.md store the information needed to
1628 generate branch and scc insns here. */
1630 rtx ix86_compare_op0 = NULL_RTX;
1631 rtx ix86_compare_op1 = NULL_RTX;
1632 rtx ix86_compare_emitted = NULL_RTX;
1634 /* Size of the register save area. */
1635 #define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
1637 /* Define the structure for the machine field in struct function. */
1639 struct stack_local_entry GTY(())
1641 unsigned short mode;
1644 struct stack_local_entry *next;
1647 /* Structure describing stack frame layout.
1648 Stack grows downward:
1654 saved frame pointer if frame_pointer_needed
1655 <- HARD_FRAME_POINTER
1660 [va_arg registers] (
1661 > to_allocate <- FRAME_POINTER
1671 HOST_WIDE_INT frame;
1673 int outgoing_arguments_size;
1676 HOST_WIDE_INT to_allocate;
1677 /* The offsets relative to ARG_POINTER. */
1678 HOST_WIDE_INT frame_pointer_offset;
1679 HOST_WIDE_INT hard_frame_pointer_offset;
1680 HOST_WIDE_INT stack_pointer_offset;
1682 /* When save_regs_using_mov is set, emit prologue using
1683 move instead of push instructions. */
1684 bool save_regs_using_mov;
1687 /* Code model option. */
1688 enum cmodel ix86_cmodel;
1690 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1692 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1694 /* Which unit we are generating floating point math for. */
1695 enum fpmath_unit ix86_fpmath;
1697 /* Which cpu are we scheduling for. */
1698 enum processor_type ix86_tune;
1700 /* Which instruction set architecture to use. */
1701 enum processor_type ix86_arch;
1703 /* true if sse prefetch instruction is not NOOP. */
1704 int x86_prefetch_sse;
1706 /* ix86_regparm_string as a number */
1707 static int ix86_regparm;
1709 /* -mstackrealign option */
1710 extern int ix86_force_align_arg_pointer;
1711 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1713 static rtx (*ix86_gen_leave) (void);
1714 static rtx (*ix86_gen_pop1) (rtx);
1715 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1716 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1717 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1718 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1719 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1721 /* Preferred alignment for stack boundary in bits. */
1722 unsigned int ix86_preferred_stack_boundary;
1724 /* Values 1-5: see jump.c */
1725 int ix86_branch_cost;
1727 /* Calling abi specific va_list type nodes. */
1728 static GTY(()) tree sysv_va_list_type_node;
1729 static GTY(()) tree ms_va_list_type_node;
1731 /* Variables which are this size or smaller are put in the data/bss
1732 or ldata/lbss sections. */
1734 int ix86_section_threshold = 65536;
1736 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1737 char internal_label_prefix[16];
1738 int internal_label_prefix_len;
1740 /* Fence to use after loop using movnt. */
1743 /* Register class used for passing given 64bit part of the argument.
1744 These represent classes as documented by the PS ABI, with the exception
1745 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1746 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1748 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1749 whenever possible (upper half does contain padding). */
1750 enum x86_64_reg_class
1753 X86_64_INTEGER_CLASS,
1754 X86_64_INTEGERSI_CLASS,
1761 X86_64_COMPLEX_X87_CLASS,
1764 static const char * const x86_64_reg_class_name[] =
1766 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1767 "sseup", "x87", "x87up", "cplx87", "no"
1770 #define MAX_CLASSES 4
1772 /* Table of constants used by fldpi, fldln2, etc.... */
1773 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1774 static bool ext_80387_constants_init = 0;
1777 static struct machine_function * ix86_init_machine_status (void);
1778 static rtx ix86_function_value (const_tree, const_tree, bool);
1779 static int ix86_function_regparm (const_tree, const_tree);
1780 static void ix86_compute_frame_layout (struct ix86_frame *);
1781 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1784 enum ix86_function_specific_strings
1786 IX86_FUNCTION_SPECIFIC_ARCH,
1787 IX86_FUNCTION_SPECIFIC_TUNE,
1788 IX86_FUNCTION_SPECIFIC_FPMATH,
1789 IX86_FUNCTION_SPECIFIC_MAX
1792 static char *ix86_target_string (int, int, const char *, const char *,
1793 const char *, bool);
1794 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1795 static void ix86_function_specific_save (struct cl_target_option *);
1796 static void ix86_function_specific_restore (struct cl_target_option *);
1797 static void ix86_function_specific_print (FILE *, int,
1798 struct cl_target_option *);
1799 static bool ix86_valid_option_attribute_p (tree, tree, tree, int);
1800 static bool ix86_valid_option_attribute_inner_p (tree, char *[]);
1801 static bool ix86_can_inline_p (tree, tree);
1802 static void ix86_set_current_function (tree);
1805 /* The svr4 ABI for the i386 says that records and unions are returned
1807 #ifndef DEFAULT_PCC_STRUCT_RETURN
1808 #define DEFAULT_PCC_STRUCT_RETURN 1
1811 /* Whether -mtune= or -march= were specified */
1812 static int ix86_tune_defaulted;
1813 static int ix86_arch_specified;
1815 /* Bit flags that specify the ISA we are compiling for. */
1816 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1818 /* A mask of ix86_isa_flags that includes bit X if X
1819 was set or cleared on the command line. */
1820 static int ix86_isa_flags_explicit;
1822 /* Define a set of ISAs which are available when a given ISA is
1823 enabled. MMX and SSE ISAs are handled separately. */
1825 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1826 #define OPTION_MASK_ISA_3DNOW_SET \
1827 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1829 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1830 #define OPTION_MASK_ISA_SSE2_SET \
1831 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1832 #define OPTION_MASK_ISA_SSE3_SET \
1833 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1834 #define OPTION_MASK_ISA_SSSE3_SET \
1835 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1836 #define OPTION_MASK_ISA_SSE4_1_SET \
1837 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1838 #define OPTION_MASK_ISA_SSE4_2_SET \
1839 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1841 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1843 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1845 #define OPTION_MASK_ISA_SSE4A_SET \
1846 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1847 #define OPTION_MASK_ISA_SSE5_SET \
1848 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1850 /* AES and PCLMUL need SSE2 because they use xmm registers */
1851 #define OPTION_MASK_ISA_AES_SET \
1852 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1853 #define OPTION_MASK_ISA_PCLMUL_SET \
1854 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1856 #define OPTION_MASK_ISA_ABM_SET \
1857 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1858 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1859 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1860 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1862 /* Define a set of ISAs which aren't available when a given ISA is
1863 disabled. MMX and SSE ISAs are handled separately. */
1865 #define OPTION_MASK_ISA_MMX_UNSET \
1866 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1867 #define OPTION_MASK_ISA_3DNOW_UNSET \
1868 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1869 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1871 #define OPTION_MASK_ISA_SSE_UNSET \
1872 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1873 #define OPTION_MASK_ISA_SSE2_UNSET \
1874 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1875 #define OPTION_MASK_ISA_SSE3_UNSET \
1876 (OPTION_MASK_ISA_SSE3 \
1877 | OPTION_MASK_ISA_SSSE3_UNSET \
1878 | OPTION_MASK_ISA_SSE4A_UNSET )
1879 #define OPTION_MASK_ISA_SSSE3_UNSET \
1880 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1881 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1882 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1883 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
1885 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1887 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1889 #define OPTION_MASK_ISA_SSE4A_UNSET \
1890 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1891 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1892 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1893 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1894 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1895 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1896 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1897 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1899 /* Vectorization library interface and handlers. */
1900 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1901 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1902 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1904 /* Processor target table, indexed by processor number */
1907 const struct processor_costs *cost; /* Processor costs */
1908 const int align_loop; /* Default alignments. */
1909 const int align_loop_max_skip;
1910 const int align_jump;
1911 const int align_jump_max_skip;
1912 const int align_func;
1915 static const struct ptt processor_target_table[PROCESSOR_max] =
1917 {&i386_cost, 4, 3, 4, 3, 4},
1918 {&i486_cost, 16, 15, 16, 15, 16},
1919 {&pentium_cost, 16, 7, 16, 7, 16},
1920 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1921 {&geode_cost, 0, 0, 0, 0, 0},
1922 {&k6_cost, 32, 7, 32, 7, 32},
1923 {&athlon_cost, 16, 7, 16, 7, 16},
1924 {&pentium4_cost, 0, 0, 0, 0, 0},
1925 {&k8_cost, 16, 7, 16, 7, 16},
1926 {&nocona_cost, 0, 0, 0, 0, 0},
1927 {&core2_cost, 16, 10, 16, 10, 16},
1928 {&generic32_cost, 16, 7, 16, 7, 16},
1929 {&generic64_cost, 16, 10, 16, 10, 16},
1930 {&amdfam10_cost, 32, 24, 32, 7, 32}
1933 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1958 /* Implement TARGET_HANDLE_OPTION. */
1961 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1968 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1969 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1973 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1974 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1981 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1982 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1986 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1987 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1997 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
1998 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2002 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2003 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2010 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2011 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2015 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2016 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2023 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2024 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2028 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2029 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2036 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2037 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2041 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2042 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2049 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2050 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2054 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2055 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2062 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2063 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2067 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2068 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2073 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2074 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2078 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2079 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2085 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2086 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2090 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2091 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2098 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2099 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2103 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2104 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2111 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2112 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2116 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2117 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2124 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2125 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2129 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2130 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2137 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2138 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2142 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2143 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2150 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2151 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2155 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2156 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2163 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2164 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2168 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2169 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2176 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2177 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2181 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2182 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2191 /* Return a string the documents the current -m options. The caller is
2192 responsible for freeing the string. */
2195 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2196 const char *fpmath, bool add_nl_p)
2198 struct ix86_target_opts
2200 const char *option; /* option string */
2201 int mask; /* isa mask options */
2204 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2205 preceding options while match those first. */
2206 static struct ix86_target_opts isa_opts[] =
2208 { "-m64", OPTION_MASK_ISA_64BIT },
2209 { "-msse5", OPTION_MASK_ISA_SSE5 },
2210 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2211 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2212 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2213 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2214 { "-msse3", OPTION_MASK_ISA_SSE3 },
2215 { "-msse2", OPTION_MASK_ISA_SSE2 },
2216 { "-msse", OPTION_MASK_ISA_SSE },
2217 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2218 { "-mmmx", OPTION_MASK_ISA_MMX },
2219 { "-mabm", OPTION_MASK_ISA_ABM },
2220 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2221 { "-maes", OPTION_MASK_ISA_AES },
2222 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2226 static struct ix86_target_opts flag_opts[] =
2228 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2229 { "-m80387", MASK_80387 },
2230 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2231 { "-malign-double", MASK_ALIGN_DOUBLE },
2232 { "-mcld", MASK_CLD },
2233 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2234 { "-mieee-fp", MASK_IEEE_FP },
2235 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2236 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2237 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2238 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2239 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2240 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2241 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2242 { "-mno-red-zone", MASK_NO_RED_ZONE },
2243 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2244 { "-mrecip", MASK_RECIP },
2245 { "-mrtd", MASK_RTD },
2246 { "-msseregparm", MASK_SSEREGPARM },
2247 { "-mstack-arg-probe", MASK_STACK_PROBE },
2248 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2251 const char *opts[ (sizeof (isa_opts) / sizeof (isa_opts[0])
2252 + sizeof (flag_opts) / sizeof (flag_opts[0])
2256 char target_other[40];
2265 memset (opts, '\0', sizeof (opts));
2267 /* Add -march= option. */
2270 opts[num][0] = "-march=";
2271 opts[num++][1] = arch;
2274 /* Add -mtune= option. */
2277 opts[num][0] = "-mtune=";
2278 opts[num++][1] = tune;
2281 /* Pick out the options in isa options. */
2282 for (i = 0; i < sizeof (isa_opts) / sizeof (isa_opts[0]); i++)
2284 if ((isa & isa_opts[i].mask) != 0)
2286 opts[num++][0] = isa_opts[i].option;
2287 isa &= ~ isa_opts[i].mask;
2291 if (isa && add_nl_p)
2293 opts[num++][0] = isa_other;
2294 sprintf (isa_other, "(other isa: 0x%x)", isa);
2297 /* Add flag options. */
2298 for (i = 0; i < sizeof (flag_opts) / sizeof (flag_opts[0]); i++)
2300 if ((flags & flag_opts[i].mask) != 0)
2302 opts[num++][0] = flag_opts[i].option;
2303 flags &= ~ flag_opts[i].mask;
2307 if (flags && add_nl_p)
2309 opts[num++][0] = target_other;
2310 sprintf (target_other, "(other flags: 0x%x)", isa);
2313 /* Add -fpmath= option. */
2316 opts[num][0] = "-mfpmath=";
2317 opts[num++][1] = fpmath;
2324 gcc_assert (num < sizeof (opts) / sizeof (opts[0]));
2326 /* Size the string. */
2328 sep_len = (add_nl_p) ? 3 : 1;
2329 for (i = 0; i < num; i++)
2332 for (j = 0; j < 2; j++)
2334 len += strlen (opts[i][j]);
2337 /* Build the string. */
2338 ret = ptr = (char *) xmalloc (len);
2341 for (i = 0; i < num; i++)
2345 for (j = 0; j < 2; j++)
2346 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2353 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2361 for (j = 0; j < 2; j++)
2364 memcpy (ptr, opts[i][j], len2[j]);
2366 line_len += len2[j];
2371 gcc_assert (ret + len >= ptr);
2376 /* Function that is callable from the debugger to print the current
2379 ix86_debug_options (void)
2381 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2382 ix86_arch_string, ix86_tune_string,
2383 ix86_fpmath_string, true);
2387 fprintf (stderr, "%s\n\n", opts);
2391 fprintf (stderr, "<no options>\n\n");
2396 /* Sometimes certain combinations of command options do not make
2397 sense on a particular target machine. You can define a macro
2398 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2399 defined, is executed once just after all the command options have
2402 Don't use this macro to turn on various extra optimizations for
2403 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2406 override_options (bool main_args_p)
2409 unsigned int ix86_arch_mask, ix86_tune_mask;
2414 /* Comes from final.c -- no real reason to change it. */
2415 #define MAX_CODE_ALIGN 16
2423 PTA_PREFETCH_SSE = 1 << 4,
2425 PTA_3DNOW_A = 1 << 6,
2429 PTA_POPCNT = 1 << 10,
2431 PTA_SSE4A = 1 << 12,
2432 PTA_NO_SAHF = 1 << 13,
2433 PTA_SSE4_1 = 1 << 14,
2434 PTA_SSE4_2 = 1 << 15,
2437 PTA_PCLMUL = 1 << 18
2442 const char *const name; /* processor name or nickname. */
2443 const enum processor_type processor;
2444 const unsigned /*enum pta_flags*/ flags;
2446 const processor_alias_table[] =
2448 {"i386", PROCESSOR_I386, 0},
2449 {"i486", PROCESSOR_I486, 0},
2450 {"i586", PROCESSOR_PENTIUM, 0},
2451 {"pentium", PROCESSOR_PENTIUM, 0},
2452 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2453 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2454 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2455 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2456 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2457 {"i686", PROCESSOR_PENTIUMPRO, 0},
2458 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2459 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2460 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2461 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2462 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2463 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2464 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2465 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2466 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2467 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2468 | PTA_CX16 | PTA_NO_SAHF)},
2469 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2470 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2473 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2474 |PTA_PREFETCH_SSE)},
2475 {"k6", PROCESSOR_K6, PTA_MMX},
2476 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2477 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2478 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2479 | PTA_PREFETCH_SSE)},
2480 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2481 | PTA_PREFETCH_SSE)},
2482 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2484 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2486 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2488 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2489 | PTA_MMX | PTA_SSE | PTA_SSE2
2491 {"k8", PROCESSOR_K8, (PTA_64BIT
2492 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2493 | PTA_SSE | PTA_SSE2
2495 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2496 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2497 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2499 {"opteron", PROCESSOR_K8, (PTA_64BIT
2500 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2501 | PTA_SSE | PTA_SSE2
2503 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2504 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2505 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2507 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2508 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2509 | PTA_SSE | PTA_SSE2
2511 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2512 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2513 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2515 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2516 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2517 | PTA_SSE | PTA_SSE2
2519 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2520 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2521 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2523 | PTA_CX16 | PTA_ABM)},
2524 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2525 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2526 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2528 | PTA_CX16 | PTA_ABM)},
2529 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2530 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2533 int const pta_size = ARRAY_SIZE (processor_alias_table);
2535 /* Set up prefix/suffix so the error messages refer to either the command
2536 line argument, or the attribute(option). */
2545 prefix = "option(\"";
2550 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2551 SUBTARGET_OVERRIDE_OPTIONS;
2554 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2555 SUBSUBTARGET_OVERRIDE_OPTIONS;
2558 /* -fPIC is the default for x86_64. */
2559 if (TARGET_MACHO && TARGET_64BIT)
2562 /* Set the default values for switches whose default depends on TARGET_64BIT
2563 in case they weren't overwritten by command line options. */
2566 /* Mach-O doesn't support omitting the frame pointer for now. */
2567 if (flag_omit_frame_pointer == 2)
2568 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2569 if (flag_asynchronous_unwind_tables == 2)
2570 flag_asynchronous_unwind_tables = 1;
2571 if (flag_pcc_struct_return == 2)
2572 flag_pcc_struct_return = 0;
2576 if (flag_omit_frame_pointer == 2)
2577 flag_omit_frame_pointer = 0;
2578 if (flag_asynchronous_unwind_tables == 2)
2579 flag_asynchronous_unwind_tables = 0;
2580 if (flag_pcc_struct_return == 2)
2581 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2584 /* Need to check -mtune=generic first. */
2585 if (ix86_tune_string)
2587 if (!strcmp (ix86_tune_string, "generic")
2588 || !strcmp (ix86_tune_string, "i686")
2589 /* As special support for cross compilers we read -mtune=native
2590 as -mtune=generic. With native compilers we won't see the
2591 -mtune=native, as it was changed by the driver. */
2592 || !strcmp (ix86_tune_string, "native"))
2595 ix86_tune_string = "generic64";
2597 ix86_tune_string = "generic32";
2599 /* If this call is for setting the option attribute, allow the
2600 generic32/generic64 that was previously set. */
2601 else if (!main_args_p
2602 && (!strcmp (ix86_tune_string, "generic32")
2603 || !strcmp (ix86_tune_string, "generic64")))
2605 else if (!strncmp (ix86_tune_string, "generic", 7))
2606 error ("bad value (%s) for %stune=%s %s",
2607 ix86_tune_string, prefix, suffix, sw);
2611 if (ix86_arch_string)
2612 ix86_tune_string = ix86_arch_string;
2613 if (!ix86_tune_string)
2615 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2616 ix86_tune_defaulted = 1;
2619 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2620 need to use a sensible tune option. */
2621 if (!strcmp (ix86_tune_string, "generic")
2622 || !strcmp (ix86_tune_string, "x86-64")
2623 || !strcmp (ix86_tune_string, "i686"))
2626 ix86_tune_string = "generic64";
2628 ix86_tune_string = "generic32";
2631 if (ix86_stringop_string)
2633 if (!strcmp (ix86_stringop_string, "rep_byte"))
2634 stringop_alg = rep_prefix_1_byte;
2635 else if (!strcmp (ix86_stringop_string, "libcall"))
2636 stringop_alg = libcall;
2637 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2638 stringop_alg = rep_prefix_4_byte;
2639 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2640 stringop_alg = rep_prefix_8_byte;
2641 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2642 stringop_alg = loop_1_byte;
2643 else if (!strcmp (ix86_stringop_string, "loop"))
2644 stringop_alg = loop;
2645 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2646 stringop_alg = unrolled_loop;
2648 error ("bad value (%s) for %sstringop-strategy=%s %s",
2649 ix86_stringop_string, prefix, suffix, sw);
2651 if (!strcmp (ix86_tune_string, "x86-64"))
2652 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2653 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2654 prefix, suffix, prefix, suffix, prefix, suffix);
2656 if (!ix86_arch_string)
2657 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2659 ix86_arch_specified = 1;
2661 if (!strcmp (ix86_arch_string, "generic"))
2662 error ("generic CPU can be used only for %stune=%s %s",
2663 prefix, suffix, sw);
2664 if (!strncmp (ix86_arch_string, "generic", 7))
2665 error ("bad value (%s) for %sarch=%s %s",
2666 ix86_arch_string, prefix, suffix, sw);
2668 if (ix86_cmodel_string != 0)
2670 if (!strcmp (ix86_cmodel_string, "small"))
2671 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2672 else if (!strcmp (ix86_cmodel_string, "medium"))
2673 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2674 else if (!strcmp (ix86_cmodel_string, "large"))
2675 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2677 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2678 else if (!strcmp (ix86_cmodel_string, "32"))
2679 ix86_cmodel = CM_32;
2680 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2681 ix86_cmodel = CM_KERNEL;
2683 error ("bad value (%s) for %scmodel=%s %s",
2684 ix86_cmodel_string, prefix, suffix, sw);
2688 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2689 use of rip-relative addressing. This eliminates fixups that
2690 would otherwise be needed if this object is to be placed in a
2691 DLL, and is essentially just as efficient as direct addressing. */
2692 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2693 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2694 else if (TARGET_64BIT)
2695 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2697 ix86_cmodel = CM_32;
2699 if (ix86_asm_string != 0)
2702 && !strcmp (ix86_asm_string, "intel"))
2703 ix86_asm_dialect = ASM_INTEL;
2704 else if (!strcmp (ix86_asm_string, "att"))
2705 ix86_asm_dialect = ASM_ATT;
2707 error ("bad value (%s) for %sasm=%s %s",
2708 ix86_asm_string, prefix, suffix, sw);
2710 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2711 error ("code model %qs not supported in the %s bit mode",
2712 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2713 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2714 sorry ("%i-bit mode not compiled in",
2715 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2717 for (i = 0; i < pta_size; i++)
2718 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2720 ix86_arch = processor_alias_table[i].processor;
2721 /* Default cpu tuning to the architecture. */
2722 ix86_tune = ix86_arch;
2724 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2725 error ("CPU you selected does not support x86-64 "
2728 if (processor_alias_table[i].flags & PTA_MMX
2729 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2730 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2731 if (processor_alias_table[i].flags & PTA_3DNOW
2732 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2733 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2734 if (processor_alias_table[i].flags & PTA_3DNOW_A
2735 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2736 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2737 if (processor_alias_table[i].flags & PTA_SSE
2738 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2739 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2740 if (processor_alias_table[i].flags & PTA_SSE2
2741 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2742 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2743 if (processor_alias_table[i].flags & PTA_SSE3
2744 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2745 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2746 if (processor_alias_table[i].flags & PTA_SSSE3
2747 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2748 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2749 if (processor_alias_table[i].flags & PTA_SSE4_1
2750 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2751 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2752 if (processor_alias_table[i].flags & PTA_SSE4_2
2753 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2754 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2755 if (processor_alias_table[i].flags & PTA_SSE4A
2756 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2757 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2758 if (processor_alias_table[i].flags & PTA_SSE5
2759 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2760 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2761 if (processor_alias_table[i].flags & PTA_ABM
2762 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2763 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2764 if (processor_alias_table[i].flags & PTA_CX16
2765 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2766 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2767 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2768 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2769 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2770 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2771 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2772 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2773 if (processor_alias_table[i].flags & PTA_AES
2774 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2775 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2776 if (processor_alias_table[i].flags & PTA_PCLMUL
2777 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2778 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2779 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2780 x86_prefetch_sse = true;
2786 error ("bad value (%s) for %sarch=%s %s",
2787 ix86_arch_string, prefix, suffix, sw);
2789 ix86_arch_mask = 1u << ix86_arch;
2790 for (i = 0; i < X86_ARCH_LAST; ++i)
2791 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2793 for (i = 0; i < pta_size; i++)
2794 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2796 ix86_tune = processor_alias_table[i].processor;
2797 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2799 if (ix86_tune_defaulted)
2801 ix86_tune_string = "x86-64";
2802 for (i = 0; i < pta_size; i++)
2803 if (! strcmp (ix86_tune_string,
2804 processor_alias_table[i].name))
2806 ix86_tune = processor_alias_table[i].processor;
2809 error ("CPU you selected does not support x86-64 "
2812 /* Intel CPUs have always interpreted SSE prefetch instructions as
2813 NOPs; so, we can enable SSE prefetch instructions even when
2814 -mtune (rather than -march) points us to a processor that has them.
2815 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2816 higher processors. */
2818 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2819 x86_prefetch_sse = true;
2823 error ("bad value (%s) for %stune=%s %s",
2824 ix86_tune_string, prefix, suffix, sw);
2826 ix86_tune_mask = 1u << ix86_tune;
2827 for (i = 0; i < X86_TUNE_LAST; ++i)
2828 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2831 ix86_cost = &size_cost;
2833 ix86_cost = processor_target_table[ix86_tune].cost;
2835 /* Arrange to set up i386_stack_locals for all functions. */
2836 init_machine_status = ix86_init_machine_status;
2838 /* Validate -mregparm= value. */
2839 if (ix86_regparm_string)
2842 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2843 i = atoi (ix86_regparm_string);
2844 if (i < 0 || i > REGPARM_MAX)
2845 error ("%sregparm=%d%s is not between 0 and %d",
2846 prefix, i, suffix, REGPARM_MAX);
2851 ix86_regparm = REGPARM_MAX;
2853 /* If the user has provided any of the -malign-* options,
2854 warn and use that value only if -falign-* is not set.
2855 Remove this code in GCC 3.2 or later. */
2856 if (ix86_align_loops_string)
2858 warning (0, "%salign-loops%s is obsolete, use %salign-loops%s",
2859 prefix, suffix, prefix, suffix);
2860 if (align_loops == 0)
2862 i = atoi (ix86_align_loops_string);
2863 if (i < 0 || i > MAX_CODE_ALIGN)
2864 error ("%salign-loops=%d%s is not between 0 and %d",
2865 prefix, i, suffix, MAX_CODE_ALIGN);
2867 align_loops = 1 << i;
2871 if (ix86_align_jumps_string)
2873 warning (0, "%salign-jumps%s is obsolete, use %salign-jumps%s",
2874 prefix, suffix, prefix, suffix);
2875 if (align_jumps == 0)
2877 i = atoi (ix86_align_jumps_string);
2878 if (i < 0 || i > MAX_CODE_ALIGN)
2879 error ("%salign-loops=%d%s is not between 0 and %d",
2880 prefix, i, suffix, MAX_CODE_ALIGN);
2882 align_jumps = 1 << i;
2886 if (ix86_align_funcs_string)
2888 warning (0, "%salign-functions%s is obsolete, use %salign-functions%s",
2889 prefix, suffix, prefix, suffix);
2890 if (align_functions == 0)
2892 i = atoi (ix86_align_funcs_string);
2893 if (i < 0 || i > MAX_CODE_ALIGN)
2894 error ("%salign-loops=%d%s is not between 0 and %d",
2895 prefix, i, suffix, MAX_CODE_ALIGN);
2897 align_functions = 1 << i;
2901 /* Default align_* from the processor table. */
2902 if (align_loops == 0)
2904 align_loops = processor_target_table[ix86_tune].align_loop;
2905 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2907 if (align_jumps == 0)
2909 align_jumps = processor_target_table[ix86_tune].align_jump;
2910 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2912 if (align_functions == 0)
2914 align_functions = processor_target_table[ix86_tune].align_func;
2917 /* Validate -mbranch-cost= value, or provide default. */
2918 ix86_branch_cost = ix86_cost->branch_cost;
2919 if (ix86_branch_cost_string)
2921 i = atoi (ix86_branch_cost_string);
2923 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
2925 ix86_branch_cost = i;
2927 if (ix86_section_threshold_string)
2929 i = atoi (ix86_section_threshold_string);
2931 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
2933 ix86_section_threshold = i;
2936 if (ix86_tls_dialect_string)
2938 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2939 ix86_tls_dialect = TLS_DIALECT_GNU;
2940 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2941 ix86_tls_dialect = TLS_DIALECT_GNU2;
2942 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2943 ix86_tls_dialect = TLS_DIALECT_SUN;
2945 error ("bad value (%s) for %stls-dialect=%s %s",
2946 ix86_tls_dialect_string, prefix, suffix, sw);
2949 if (ix87_precision_string)
2951 i = atoi (ix87_precision_string);
2952 if (i != 32 && i != 64 && i != 80)
2953 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2958 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2960 /* Enable by default the SSE and MMX builtins. Do allow the user to
2961 explicitly disable any of these. In particular, disabling SSE and
2962 MMX for kernel code is extremely useful. */
2963 if (!ix86_arch_specified)
2965 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2966 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2969 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
2973 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2975 if (!ix86_arch_specified)
2977 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2979 /* i386 ABI does not specify red zone. It still makes sense to use it
2980 when programmer takes care to stack from being destroyed. */
2981 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2982 target_flags |= MASK_NO_RED_ZONE;
2985 /* Keep nonleaf frame pointers. */
2986 if (flag_omit_frame_pointer)
2987 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2988 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2989 flag_omit_frame_pointer = 1;
2991 /* If we're doing fast math, we don't care about comparison order
2992 wrt NaNs. This lets us use a shorter comparison sequence. */
2993 if (flag_finite_math_only)
2994 target_flags &= ~MASK_IEEE_FP;
2996 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2997 since the insns won't need emulation. */
2998 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2999 target_flags &= ~MASK_NO_FANCY_MATH_387;
3001 /* Likewise, if the target doesn't have a 387, or we've specified
3002 software floating point, don't use 387 inline intrinsics. */
3004 target_flags |= MASK_NO_FANCY_MATH_387;
3006 /* Turn on MMX builtins for -msse. */
3009 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3010 x86_prefetch_sse = true;
3013 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3014 if (TARGET_SSE4_2 || TARGET_ABM)
3015 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3017 /* Validate -mpreferred-stack-boundary= value, or provide default.
3018 The default of 128 bits is for Pentium III's SSE __m128. We can't
3019 change it because of optimize_size. Otherwise, we can't mix object
3020 files compiled with -Os and -On. */
3021 ix86_preferred_stack_boundary = 128;
3022 if (ix86_preferred_stack_boundary_string)
3024 i = atoi (ix86_preferred_stack_boundary_string);
3025 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3026 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3027 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3029 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3032 /* Accept -msseregparm only if at least SSE support is enabled. */
3033 if (TARGET_SSEREGPARM
3035 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3037 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3038 if (ix86_fpmath_string != 0)
3040 if (! strcmp (ix86_fpmath_string, "387"))
3041 ix86_fpmath = FPMATH_387;
3042 else if (! strcmp (ix86_fpmath_string, "sse"))
3046 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3047 ix86_fpmath = FPMATH_387;
3050 ix86_fpmath = FPMATH_SSE;
3052 else if (! strcmp (ix86_fpmath_string, "387,sse")
3053 || ! strcmp (ix86_fpmath_string, "387+sse")
3054 || ! strcmp (ix86_fpmath_string, "sse,387")
3055 || ! strcmp (ix86_fpmath_string, "sse+387")
3056 || ! strcmp (ix86_fpmath_string, "both"))
3060 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3061 ix86_fpmath = FPMATH_387;
3063 else if (!TARGET_80387)
3065 warning (0, "387 instruction set disabled, using SSE arithmetics");
3066 ix86_fpmath = FPMATH_SSE;
3069 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3072 error ("bad value (%s) for %sfpmath=%s %s",
3073 ix86_fpmath_string, prefix, suffix, sw);
3076 /* If the i387 is disabled, then do not return values in it. */
3078 target_flags &= ~MASK_FLOAT_RETURNS;
3080 /* Use external vectorized library in vectorizing intrinsics. */
3081 if (ix86_veclibabi_string)
3083 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3084 ix86_veclib_handler = ix86_veclibabi_svml;
3085 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3086 ix86_veclib_handler = ix86_veclibabi_acml;
3088 error ("unknown vectorization library ABI type (%s) for "
3089 "%sveclibabi=%s %s", ix86_veclibabi_string,
3090 prefix, suffix, sw);
3093 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3094 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3096 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3098 /* ??? Unwind info is not correct around the CFG unless either a frame
3099 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3100 unwind info generation to be aware of the CFG and propagating states
3102 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3103 || flag_exceptions || flag_non_call_exceptions)
3104 && flag_omit_frame_pointer
3105 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3107 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3108 warning (0, "unwind tables currently require either a frame pointer "
3109 "or %saccumulate-outgoing-args%s for correctness",
3111 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3114 /* If stack probes are required, the space used for large function
3115 arguments on the stack must also be probed, so enable
3116 -maccumulate-outgoing-args so this happens in the prologue. */
3117 if (TARGET_STACK_PROBE
3118 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3120 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3121 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3122 "for correctness", prefix, suffix);
3123 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3128 /* For sane SSE instruction set generation we need fcomi instruction.
3129 It is safe to enable all CMOVE instructions. */
3133 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3136 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3137 p = strchr (internal_label_prefix, 'X');
3138 internal_label_prefix_len = p - internal_label_prefix;
3142 /* When scheduling description is not available, disable scheduler pass
3143 so it won't slow down the compilation and make x87 code slower. */
3144 if (!TARGET_SCHEDULE)
3145 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3147 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3148 set_param_value ("simultaneous-prefetches",
3149 ix86_cost->simultaneous_prefetches);
3150 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3151 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3152 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3153 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3154 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3155 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3157 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3158 can be optimized to ap = __builtin_next_arg (0). */
3160 targetm.expand_builtin_va_start = NULL;
3164 ix86_gen_leave = gen_leave_rex64;
3165 ix86_gen_pop1 = gen_popdi1;
3166 ix86_gen_add3 = gen_adddi3;
3167 ix86_gen_sub3 = gen_subdi3;
3168 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3169 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3170 ix86_gen_monitor = gen_sse3_monitor64;
3174 ix86_gen_leave = gen_leave;
3175 ix86_gen_pop1 = gen_popsi1;
3176 ix86_gen_add3 = gen_addsi3;
3177 ix86_gen_sub3 = gen_subsi3;
3178 ix86_gen_sub3_carry = gen_subsi3_carry;
3179 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3180 ix86_gen_monitor = gen_sse3_monitor;
3184 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3186 target_flags |= MASK_CLD & ~target_flags_explicit;
3189 /* Save the initial options in case the user does function specific options */
3191 target_option_default_node = target_option_current_node
3192 = build_target_option_node ();
3195 /* Save the current options */
3198 ix86_function_specific_save (struct cl_target_option *ptr)
3200 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3201 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3202 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3203 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3205 ptr->arch = ix86_arch;
3206 ptr->tune = ix86_tune;
3207 ptr->fpmath = ix86_fpmath;
3208 ptr->branch_cost = ix86_branch_cost;
3209 ptr->tune_defaulted = ix86_tune_defaulted;
3210 ptr->arch_specified = ix86_arch_specified;
3211 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3212 ptr->target_flags_explicit = target_flags_explicit;
3215 /* Restore the current options */
3218 ix86_function_specific_restore (struct cl_target_option *ptr)
3220 enum processor_type old_tune = ix86_tune;
3221 enum processor_type old_arch = ix86_arch;
3222 unsigned int ix86_arch_mask, ix86_tune_mask;
3225 ix86_arch = ptr->arch;
3226 ix86_tune = ptr->tune;
3227 ix86_fpmath = ptr->fpmath;
3228 ix86_branch_cost = ptr->branch_cost;
3229 ix86_tune_defaulted = ptr->tune_defaulted;
3230 ix86_arch_specified = ptr->arch_specified;
3231 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3232 target_flags_explicit = ptr->target_flags_explicit;
3234 /* Recreate the arch feature tests if the arch changed */
3235 if (old_arch != ix86_arch)
3237 ix86_arch_mask = 1u << ix86_arch;
3238 for (i = 0; i < X86_ARCH_LAST; ++i)
3239 ix86_arch_features[i]
3240 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3243 /* Recreate the tune optimization tests */
3244 if (old_tune != ix86_tune)
3246 ix86_tune_mask = 1u << ix86_tune;
3247 for (i = 0; i < X86_TUNE_LAST; ++i)
3248 ix86_tune_features[i]
3249 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3253 /* Print the current options */
3256 ix86_function_specific_print (FILE *file, int indent,
3257 struct cl_target_option *ptr)
3260 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3261 NULL, NULL, NULL, false);
3263 fprintf (file, "%*sarch = %d (%s)\n",
3266 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3267 ? cpu_names[ptr->arch]
3270 fprintf (file, "%*stune = %d (%s)\n",
3273 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3274 ? cpu_names[ptr->tune]
3277 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3278 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3279 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3280 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3284 fprintf (file, "%*s%s\n", indent, "", target_string);
3285 free (target_string);
3290 /* Inner function to process the attribute((option(...))), take an argument and
3291 set the current options from the argument. If we have a list, recursively go
3295 ix86_valid_option_attribute_inner_p (tree args, char *p_strings[])
3300 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3301 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3302 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3303 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3318 enum ix86_opt_type type;
3323 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3324 IX86_ATTR_ISA ("abm", OPT_mabm),
3325 IX86_ATTR_ISA ("aes", OPT_maes),
3326 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3327 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3328 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3329 IX86_ATTR_ISA ("sse", OPT_msse),
3330 IX86_ATTR_ISA ("sse2", OPT_msse2),
3331 IX86_ATTR_ISA ("sse3", OPT_msse3),
3332 IX86_ATTR_ISA ("sse4", OPT_msse4),
3333 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3334 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3335 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3336 IX86_ATTR_ISA ("sse5", OPT_msse5),
3337 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3339 /* string options */
3340 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3341 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3342 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3345 IX86_ATTR_YES ("cld",
3349 IX86_ATTR_NO ("fancy-math-387",
3350 OPT_mfancy_math_387,
3351 MASK_NO_FANCY_MATH_387),
3353 IX86_ATTR_NO ("fused-madd",
3355 MASK_NO_FUSED_MADD),
3357 IX86_ATTR_YES ("ieee-fp",
3361 IX86_ATTR_YES ("inline-all-stringops",
3362 OPT_minline_all_stringops,
3363 MASK_INLINE_ALL_STRINGOPS),
3365 IX86_ATTR_YES ("inline-stringops-dynamically",
3366 OPT_minline_stringops_dynamically,
3367 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3369 IX86_ATTR_NO ("align-stringops",
3370 OPT_mno_align_stringops,
3371 MASK_NO_ALIGN_STRINGOPS),
3373 IX86_ATTR_YES ("recip",
3379 /* If this is a list, recurse to get the options. */
3380 if (TREE_CODE (args) == TREE_LIST)
3384 for (; args; args = TREE_CHAIN (args))
3385 if (TREE_VALUE (args)
3386 && !ix86_valid_option_attribute_inner_p (TREE_VALUE (args), p_strings))
3392 else if (TREE_CODE (args) != STRING_CST)
3395 /* Handle multiple arguments separated by commas. */
3396 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3398 while (next_optstr && *next_optstr != '\0')
3400 char *p = next_optstr;
3402 char *comma = strchr (next_optstr, ',');
3403 const char *opt_string;
3404 size_t len, opt_len;
3409 enum ix86_opt_type type = ix86_opt_unknown;
3415 len = comma - next_optstr;
3416 next_optstr = comma + 1;
3424 /* Recognize no-xxx. */
3425 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3434 /* Find the option. */
3437 for (i = 0; i < sizeof (attrs) / sizeof (attrs[0]); i++)
3439 type = attrs[i].type;
3440 opt_len = attrs[i].len;
3441 if (ch == attrs[i].string[0]
3442 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3443 && memcmp (p, attrs[i].string, opt_len) == 0)
3446 mask = attrs[i].mask;
3447 opt_string = attrs[i].string;
3452 /* Process the option. */
3455 error ("attribute(option(\"%s\")) is unknown", orig_p);
3459 else if (type == ix86_opt_isa)
3460 ix86_handle_option (opt, p, opt_set_p);
3462 else if (type == ix86_opt_yes || type == ix86_opt_no)
3464 if (type == ix86_opt_no)
3465 opt_set_p = !opt_set_p;
3468 target_flags |= mask;
3470 target_flags &= ~mask;
3473 else if (type == ix86_opt_str)
3477 error ("option(\"%s\") was already specified", opt_string);
3481 p_strings[opt] = xstrdup (p + opt_len);
3491 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3494 ix86_valid_option_attribute_tree (tree args)
3496 const char *orig_arch_string = ix86_arch_string;
3497 const char *orig_tune_string = ix86_tune_string;
3498 const char *orig_fpmath_string = ix86_fpmath_string;
3499 int orig_tune_defaulted = ix86_tune_defaulted;
3500 int orig_arch_specified = ix86_arch_specified;
3501 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3504 struct cl_target_option *def
3505 = TREE_TARGET_OPTION (target_option_default_node);
3507 /* Process each of the options on the chain. */
3508 if (! ix86_valid_option_attribute_inner_p (args, option_strings))
3511 /* If the changed options are different from the default, rerun override_options,
3512 and then save the options away. The string options are are attribute options,
3513 and will be undone when we copy the save structure. */
3514 if (ix86_isa_flags != def->ix86_isa_flags
3515 || target_flags != def->target_flags
3516 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3517 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3518 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3520 /* If we are using the default tune= or arch=, undo the string assigned,
3521 and use the default. */
3522 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3523 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3524 else if (!orig_arch_specified)
3525 ix86_arch_string = NULL;
3527 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3528 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3529 else if (orig_tune_defaulted)
3530 ix86_tune_string = NULL;
3532 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3533 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3534 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3535 else if (!TARGET_64BIT && TARGET_SSE)
3536 ix86_fpmath_string = "sse,387";
3538 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3539 override_options (false);
3541 /* Save the current options unless we are validating options for
3543 t = build_target_option_node ();
3545 ix86_arch_string = orig_arch_string;
3546 ix86_tune_string = orig_tune_string;
3547 ix86_fpmath_string = orig_fpmath_string;
3549 /* Free up memory allocated to hold the strings */
3550 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3551 if (option_strings[i])
3552 free (option_strings[i]);
3558 /* Hook to validate attribute((option("string"))). */
3561 ix86_valid_option_attribute_p (tree fndecl,
3562 tree ARG_UNUSED (name),
3564 int ARG_UNUSED (flags))
3566 struct cl_target_option cur_opts;
3570 cl_target_option_save (&cur_opts);
3571 new_opts = ix86_valid_option_attribute_tree (args);
3576 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_opts;
3578 cl_target_option_restore (&cur_opts);
3583 /* Hook to determine if one function can safely inline another. */
3586 ix86_can_inline_p (tree caller, tree callee)
3589 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3590 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3592 /* If callee has no option attributes, then it is ok to inline. */
3596 /* If caller has no option attributes, but callee does then it is not ok to
3598 else if (!caller_tree)
3603 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3604 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3606 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3607 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3609 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3610 != callee_opts->ix86_isa_flags)
3613 /* See if we have the same non-isa options. */
3614 else if (caller_opts->target_flags != callee_opts->target_flags)
3617 /* See if arch, tune, etc. are the same. */
3618 else if (caller_opts->arch != callee_opts->arch)
3621 else if (caller_opts->tune != callee_opts->tune)
3624 else if (caller_opts->fpmath != callee_opts->fpmath)
3627 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3638 /* Remember the last target of ix86_set_current_function. */
3639 static GTY(()) tree ix86_previous_fndecl;
3641 /* Establish appropriate back-end context for processing the function
3642 FNDECL. The argument might be NULL to indicate processing at top
3643 level, outside of any function scope. */
3645 ix86_set_current_function (tree fndecl)
3647 /* Only change the context if the function changes. This hook is called
3648 several times in the course of compiling a function, and we don't want to
3649 slow things down too much or call target_reinit when it isn't safe. */
3650 if (fndecl && fndecl != ix86_previous_fndecl)
3652 tree old_tree = (ix86_previous_fndecl
3653 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3656 tree new_tree = (fndecl
3657 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3660 ix86_previous_fndecl = fndecl;
3661 if (old_tree == new_tree)
3666 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3672 struct cl_target_option *def
3673 = TREE_TARGET_OPTION (target_option_current_node);
3675 cl_target_option_restore (def);
3682 /* Return true if this goes in large data/bss. */
3685 ix86_in_large_data_p (tree exp)
3687 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3690 /* Functions are never large data. */
3691 if (TREE_CODE (exp) == FUNCTION_DECL)
3694 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3696 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3697 if (strcmp (section, ".ldata") == 0
3698 || strcmp (section, ".lbss") == 0)
3704 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3706 /* If this is an incomplete type with size 0, then we can't put it
3707 in data because it might be too big when completed. */
3708 if (!size || size > ix86_section_threshold)
3715 /* Switch to the appropriate section for output of DECL.
3716 DECL is either a `VAR_DECL' node or a constant of some sort.
3717 RELOC indicates whether forming the initial value of DECL requires
3718 link-time relocations. */
3720 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3724 x86_64_elf_select_section (tree decl, int reloc,
3725 unsigned HOST_WIDE_INT align)
3727 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3728 && ix86_in_large_data_p (decl))
3730 const char *sname = NULL;
3731 unsigned int flags = SECTION_WRITE;
3732 switch (categorize_decl_for_section (decl, reloc))
3737 case SECCAT_DATA_REL:
3738 sname = ".ldata.rel";
3740 case SECCAT_DATA_REL_LOCAL:
3741 sname = ".ldata.rel.local";
3743 case SECCAT_DATA_REL_RO:
3744 sname = ".ldata.rel.ro";
3746 case SECCAT_DATA_REL_RO_LOCAL:
3747 sname = ".ldata.rel.ro.local";
3751 flags |= SECTION_BSS;
3754 case SECCAT_RODATA_MERGE_STR:
3755 case SECCAT_RODATA_MERGE_STR_INIT:
3756 case SECCAT_RODATA_MERGE_CONST:
3760 case SECCAT_SRODATA:
3767 /* We don't split these for medium model. Place them into
3768 default sections and hope for best. */
3770 case SECCAT_EMUTLS_VAR:
3771 case SECCAT_EMUTLS_TMPL:
3776 /* We might get called with string constants, but get_named_section
3777 doesn't like them as they are not DECLs. Also, we need to set
3778 flags in that case. */
3780 return get_section (sname, flags, NULL);
3781 return get_named_section (decl, sname, reloc);
3784 return default_elf_select_section (decl, reloc, align);
3787 /* Build up a unique section name, expressed as a
3788 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
3789 RELOC indicates whether the initial value of EXP requires
3790 link-time relocations. */
3792 static void ATTRIBUTE_UNUSED
3793 x86_64_elf_unique_section (tree decl, int reloc)
3795 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3796 && ix86_in_large_data_p (decl))
3798 const char *prefix = NULL;
3799 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
3800 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
3802 switch (categorize_decl_for_section (decl, reloc))
3805 case SECCAT_DATA_REL:
3806 case SECCAT_DATA_REL_LOCAL:
3807 case SECCAT_DATA_REL_RO:
3808 case SECCAT_DATA_REL_RO_LOCAL:
3809 prefix = one_only ? ".ld" : ".ldata";
3812 prefix = one_only ? ".lb" : ".lbss";
3815 case SECCAT_RODATA_MERGE_STR:
3816 case SECCAT_RODATA_MERGE_STR_INIT:
3817 case SECCAT_RODATA_MERGE_CONST:
3818 prefix = one_only ? ".lr" : ".lrodata";
3820 case SECCAT_SRODATA:
3827 /* We don't split these for medium model. Place them into
3828 default sections and hope for best. */
3830 case SECCAT_EMUTLS_VAR:
3831 prefix = targetm.emutls.var_section;
3833 case SECCAT_EMUTLS_TMPL:
3834 prefix = targetm.emutls.tmpl_section;
3839 const char *name, *linkonce;
3842 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
3843 name = targetm.strip_name_encoding (name);
3845 /* If we're using one_only, then there needs to be a .gnu.linkonce
3846 prefix to the section name. */
3847 linkonce = one_only ? ".gnu.linkonce" : "";
3849 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
3851 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
3855 default_unique_section (decl, reloc);
3858 #ifdef COMMON_ASM_OP
3859 /* This says how to output assembler code to declare an
3860 uninitialized external linkage data object.
3862 For medium model x86-64 we need to use .largecomm opcode for
3865 x86_elf_aligned_common (FILE *file,
3866 const char *name, unsigned HOST_WIDE_INT size,
3869 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3870 && size > (unsigned int)ix86_section_threshold)
3871 fprintf (file, ".largecomm\t");
3873 fprintf (file, "%s", COMMON_ASM_OP);
3874 assemble_name (file, name);
3875 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
3876 size, align / BITS_PER_UNIT);
3880 /* Utility function for targets to use in implementing
3881 ASM_OUTPUT_ALIGNED_BSS. */
3884 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
3885 const char *name, unsigned HOST_WIDE_INT size,
3888 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3889 && size > (unsigned int)ix86_section_threshold)
3890 switch_to_section (get_named_section (decl, ".lbss", 0));
3892 switch_to_section (bss_section);
3893 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
3894 #ifdef ASM_DECLARE_OBJECT_NAME
3895 last_assemble_variable_decl = decl;
3896 ASM_DECLARE_OBJECT_NAME (file, name, decl);
3898 /* Standard thing is just output label for the object. */
3899 ASM_OUTPUT_LABEL (file, name);
3900 #endif /* ASM_DECLARE_OBJECT_NAME */
3901 ASM_OUTPUT_SKIP (file, size ? size : 1);
3905 optimization_options (int level, int size ATTRIBUTE_UNUSED)
3907 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
3908 make the problem with not enough registers even worse. */
3909 #ifdef INSN_SCHEDULING
3911 flag_schedule_insns = 0;
3915 /* The Darwin libraries never set errno, so we might as well
3916 avoid calling them when that's the only reason we would. */
3917 flag_errno_math = 0;
3919 /* The default values of these switches depend on the TARGET_64BIT
3920 that is not known at this moment. Mark these values with 2 and
3921 let user the to override these. In case there is no command line option
3922 specifying them, we will set the defaults in override_options. */
3924 flag_omit_frame_pointer = 2;
3925 flag_pcc_struct_return = 2;
3926 flag_asynchronous_unwind_tables = 2;
3927 flag_vect_cost_model = 1;
3928 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
3929 SUBTARGET_OPTIMIZATION_OPTIONS;
3933 /* Decide whether we can make a sibling call to a function. DECL is the
3934 declaration of the function being targeted by the call and EXP is the
3935 CALL_EXPR representing the call. */
3938 ix86_function_ok_for_sibcall (tree decl, tree exp)
3943 /* If we are generating position-independent code, we cannot sibcall
3944 optimize any indirect call, or a direct call to a global function,
3945 as the PLT requires %ebx be live. */
3946 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
3953 func = TREE_TYPE (CALL_EXPR_FN (exp));
3954 if (POINTER_TYPE_P (func))
3955 func = TREE_TYPE (func);
3958 /* Check that the return value locations are the same. Like
3959 if we are returning floats on the 80387 register stack, we cannot
3960 make a sibcall from a function that doesn't return a float to a
3961 function that does or, conversely, from a function that does return
3962 a float to a function that doesn't; the necessary stack adjustment
3963 would not be executed. This is also the place we notice
3964 differences in the return value ABI. Note that it is ok for one
3965 of the functions to have void return type as long as the return
3966 value of the other is passed in a register. */
3967 a = ix86_function_value (TREE_TYPE (exp), func, false);
3968 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
3970 if (STACK_REG_P (a) || STACK_REG_P (b))
3972 if (!rtx_equal_p (a, b))
3975 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
3977 else if (!rtx_equal_p (a, b))
3980 /* If this call is indirect, we'll need to be able to use a call-clobbered
3981 register for the address of the target function. Make sure that all
3982 such registers are not used for passing parameters. */
3983 if (!decl && !TARGET_64BIT)
3987 /* We're looking at the CALL_EXPR, we need the type of the function. */
3988 type = CALL_EXPR_FN (exp); /* pointer expression */
3989 type = TREE_TYPE (type); /* pointer type */
3990 type = TREE_TYPE (type); /* function type */
3992 if (ix86_function_regparm (type, NULL) >= 3)
3994 /* ??? Need to count the actual number of registers to be used,
3995 not the possible number of registers. Fix later. */
4000 /* Dllimport'd functions are also called indirectly. */
4001 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4002 && decl && DECL_DLLIMPORT_P (decl)
4003 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4006 /* If we forced aligned the stack, then sibcalling would unalign the
4007 stack, which may break the called function. */
4008 if (cfun->machine->force_align_arg_pointer)
4011 /* Otherwise okay. That also includes certain types of indirect calls. */
4015 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4016 calling convention attributes;
4017 arguments as in struct attribute_spec.handler. */
4020 ix86_handle_cconv_attribute (tree *node, tree name,
4022 int flags ATTRIBUTE_UNUSED,
4025 if (TREE_CODE (*node) != FUNCTION_TYPE
4026 && TREE_CODE (*node) != METHOD_TYPE
4027 && TREE_CODE (*node) != FIELD_DECL
4028 && TREE_CODE (*node) != TYPE_DECL)
4030 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4031 IDENTIFIER_POINTER (name));
4032 *no_add_attrs = true;
4036 /* Can combine regparm with all attributes but fastcall. */
4037 if (is_attribute_p ("regparm", name))
4041 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4043 error ("fastcall and regparm attributes are not compatible");
4046 cst = TREE_VALUE (args);
4047 if (TREE_CODE (cst) != INTEGER_CST)
4049 warning (OPT_Wattributes,
4050 "%qs attribute requires an integer constant argument",
4051 IDENTIFIER_POINTER (name));
4052 *no_add_attrs = true;
4054 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4056 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4057 IDENTIFIER_POINTER (name), REGPARM_MAX);
4058 *no_add_attrs = true;
4062 && lookup_attribute (ix86_force_align_arg_pointer_string,
4063 TYPE_ATTRIBUTES (*node))
4064 && compare_tree_int (cst, REGPARM_MAX-1))
4066 error ("%s functions limited to %d register parameters",
4067 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
4075 /* Do not warn when emulating the MS ABI. */
4076 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4077 warning (OPT_Wattributes, "%qs attribute ignored",
4078 IDENTIFIER_POINTER (name));
4079 *no_add_attrs = true;
4083 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4084 if (is_attribute_p ("fastcall", name))
4086 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4088 error ("fastcall and cdecl attributes are not compatible");
4090 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4092 error ("fastcall and stdcall attributes are not compatible");
4094 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4096 error ("fastcall and regparm attributes are not compatible");
4100 /* Can combine stdcall with fastcall (redundant), regparm and
4102 else if (is_attribute_p ("stdcall", name))
4104 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4106 error ("stdcall and cdecl attributes are not compatible");
4108 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4110 error ("stdcall and fastcall attributes are not compatible");
4114 /* Can combine cdecl with regparm and sseregparm. */
4115 else if (is_attribute_p ("cdecl", name))
4117 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4119 error ("stdcall and cdecl attributes are not compatible");
4121 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4123 error ("fastcall and cdecl attributes are not compatible");
4127 /* Can combine sseregparm with all attributes. */
4132 /* Return 0 if the attributes for two types are incompatible, 1 if they
4133 are compatible, and 2 if they are nearly compatible (which causes a
4134 warning to be generated). */
4137 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4139 /* Check for mismatch of non-default calling convention. */
4140 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4142 if (TREE_CODE (type1) != FUNCTION_TYPE
4143 && TREE_CODE (type1) != METHOD_TYPE)
4146 /* Check for mismatched fastcall/regparm types. */
4147 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4148 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4149 || (ix86_function_regparm (type1, NULL)
4150 != ix86_function_regparm (type2, NULL)))
4153 /* Check for mismatched sseregparm types. */
4154 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4155 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4158 /* Check for mismatched return types (cdecl vs stdcall). */
4159 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4160 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4166 /* Return the regparm value for a function with the indicated TYPE and DECL.
4167 DECL may be NULL when calling function indirectly
4168 or considering a libcall. */
4171 ix86_function_regparm (const_tree type, const_tree decl)
4174 int regparm = ix86_regparm;
4176 static bool error_issued;
4180 if (ix86_function_type_abi (type) == DEFAULT_ABI)
4182 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
4185 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4189 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4191 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4193 /* We can't use regparm(3) for nested functions because
4194 these pass static chain pointer in %ecx register. */
4195 if (!error_issued && regparm == 3
4196 && decl_function_context (decl)
4197 && !DECL_NO_STATIC_CHAIN (decl))
4199 error ("nested functions are limited to 2 register parameters");
4200 error_issued = true;
4208 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4211 /* Use register calling convention for local functions when possible. */
4212 if (decl && TREE_CODE (decl) == FUNCTION_DECL
4215 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4216 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4219 int local_regparm, globals = 0, regno;
4222 /* Make sure no regparm register is taken by a
4223 fixed register variable. */
4224 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4225 if (fixed_regs[local_regparm])
4228 /* We can't use regparm(3) for nested functions as these use
4229 static chain pointer in third argument. */
4230 if (local_regparm == 3
4231 && (decl_function_context (decl)
4232 || ix86_force_align_arg_pointer)
4233 && !DECL_NO_STATIC_CHAIN (decl))
4236 /* If the function realigns its stackpointer, the prologue will
4237 clobber %ecx. If we've already generated code for the callee,
4238 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4239 scanning the attributes for the self-realigning property. */
4240 f = DECL_STRUCT_FUNCTION (decl);
4241 if (local_regparm == 3
4242 && (f ? !!f->machine->force_align_arg_pointer
4243 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
4244 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
4247 /* Each fixed register usage increases register pressure,
4248 so less registers should be used for argument passing.
4249 This functionality can be overriden by an explicit
4251 for (regno = 0; regno <= DI_REG; regno++)
4252 if (fixed_regs[regno])
4256 = globals < local_regparm ? local_regparm - globals : 0;
4258 if (local_regparm > regparm)
4259 regparm = local_regparm;
4266 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4267 DFmode (2) arguments in SSE registers for a function with the
4268 indicated TYPE and DECL. DECL may be NULL when calling function
4269 indirectly or considering a libcall. Otherwise return 0. */
4272 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4274 gcc_assert (!TARGET_64BIT);
4276 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4277 by the sseregparm attribute. */
4278 if (TARGET_SSEREGPARM
4279 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4286 error ("Calling %qD with attribute sseregparm without "
4287 "SSE/SSE2 enabled", decl);
4289 error ("Calling %qT with attribute sseregparm without "
4290 "SSE/SSE2 enabled", type);
4298 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4299 (and DFmode for SSE2) arguments in SSE registers. */
4300 if (decl && TARGET_SSE_MATH && !profile_flag)
4302 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4303 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4305 return TARGET_SSE2 ? 2 : 1;
4311 /* Return true if EAX is live at the start of the function. Used by
4312 ix86_expand_prologue to determine if we need special help before
4313 calling allocate_stack_worker. */
4316 ix86_eax_live_at_start_p (void)
4318 /* Cheat. Don't bother working forward from ix86_function_regparm
4319 to the function type to whether an actual argument is located in
4320 eax. Instead just look at cfg info, which is still close enough
4321 to correct at this point. This gives false positives for broken
4322 functions that might use uninitialized data that happens to be
4323 allocated in eax, but who cares? */
4324 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4327 /* Value is the number of bytes of arguments automatically
4328 popped when returning from a subroutine call.
4329 FUNDECL is the declaration node of the function (as a tree),
4330 FUNTYPE is the data type of the function (as a tree),
4331 or for a library call it is an identifier node for the subroutine name.
4332 SIZE is the number of bytes of arguments passed on the stack.
4334 On the 80386, the RTD insn may be used to pop them if the number
4335 of args is fixed, but if the number is variable then the caller
4336 must pop them all. RTD can't be used for library calls now
4337 because the library is compiled with the Unix compiler.
4338 Use of RTD is a selectable option, since it is incompatible with
4339 standard Unix calling sequences. If the option is not selected,
4340 the caller must always pop the args.
4342 The attribute stdcall is equivalent to RTD on a per module basis. */
4345 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4349 /* None of the 64-bit ABIs pop arguments. */
4353 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4355 /* Cdecl functions override -mrtd, and never pop the stack. */
4356 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4358 /* Stdcall and fastcall functions will pop the stack if not
4360 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4361 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4364 if (rtd && ! stdarg_p (funtype))
4368 /* Lose any fake structure return argument if it is passed on the stack. */
4369 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4370 && !KEEP_AGGREGATE_RETURN_POINTER)
4372 int nregs = ix86_function_regparm (funtype, fundecl);
4374 return GET_MODE_SIZE (Pmode);
4380 /* Argument support functions. */
4382 /* Return true when register may be used to pass function parameters. */
4384 ix86_function_arg_regno_p (int regno)
4387 const int *parm_regs;
4392 return (regno < REGPARM_MAX
4393 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4395 return (regno < REGPARM_MAX
4396 || (TARGET_MMX && MMX_REGNO_P (regno)
4397 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4398 || (TARGET_SSE && SSE_REGNO_P (regno)
4399 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4404 if (SSE_REGNO_P (regno) && TARGET_SSE)
4409 if (TARGET_SSE && SSE_REGNO_P (regno)
4410 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4414 /* TODO: The function should depend on current function ABI but
4415 builtins.c would need updating then. Therefore we use the
4418 /* RAX is used as hidden argument to va_arg functions. */
4419 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
4422 if (DEFAULT_ABI == MS_ABI)
4423 parm_regs = x86_64_ms_abi_int_parameter_registers;
4425 parm_regs = x86_64_int_parameter_registers;
4426 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
4427 : X86_64_REGPARM_MAX); i++)
4428 if (regno == parm_regs[i])
4433 /* Return if we do not know how to pass TYPE solely in registers. */
4436 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4438 if (must_pass_in_stack_var_size_or_pad (mode, type))
4441 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4442 The layout_type routine is crafty and tries to trick us into passing
4443 currently unsupported vector types on the stack by using TImode. */
4444 return (!TARGET_64BIT && mode == TImode
4445 && type && TREE_CODE (type) != VECTOR_TYPE);
4448 /* It returns the size, in bytes, of the area reserved for arguments passed
4449 in registers for the function represented by fndecl dependent to the used
4452 ix86_reg_parm_stack_space (const_tree fndecl)
4455 /* For libcalls it is possible that there is no fndecl at hand.
4456 Therefore assume for this case the default abi of the target. */
4458 call_abi = DEFAULT_ABI;
4460 call_abi = ix86_function_abi (fndecl);
4466 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4469 ix86_function_type_abi (const_tree fntype)
4471 if (TARGET_64BIT && fntype != NULL)
4474 if (DEFAULT_ABI == SYSV_ABI)
4475 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
4477 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
4485 ix86_function_abi (const_tree fndecl)
4489 return ix86_function_type_abi (TREE_TYPE (fndecl));
4492 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4495 ix86_cfun_abi (void)
4497 if (! cfun || ! TARGET_64BIT)
4499 return cfun->machine->call_abi;
4503 extern void init_regs (void);
4505 /* Implementation of call abi switching target hook. Specific to FNDECL
4506 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4508 To prevent redudant calls of costy function init_regs (), it checks not to
4509 reset register usage for default abi. */
4511 ix86_call_abi_override (const_tree fndecl)
4513 if (fndecl == NULL_TREE)
4514 cfun->machine->call_abi = DEFAULT_ABI;
4516 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4517 if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
4519 if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
4521 call_used_regs[4 /*RSI*/] = 0;
4522 call_used_regs[5 /*RDI*/] = 0;
4526 else if (TARGET_64BIT)
4528 if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
4530 call_used_regs[4 /*RSI*/] = 1;
4531 call_used_regs[5 /*RDI*/] = 1;
4537 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4538 for a call to a function whose data type is FNTYPE.
4539 For a library call, FNTYPE is 0. */
4542 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4543 tree fntype, /* tree ptr for function decl */
4544 rtx libname, /* SYMBOL_REF of library name or 0 */
4547 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4548 memset (cum, 0, sizeof (*cum));
4550 cum->call_abi = ix86_function_type_abi (fntype);
4551 /* Set up the number of registers to use for passing arguments. */
4552 cum->nregs = ix86_regparm;
4555 if (cum->call_abi != DEFAULT_ABI)
4556 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
4561 cum->sse_nregs = SSE_REGPARM_MAX;
4564 if (cum->call_abi != DEFAULT_ABI)
4565 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4566 : X64_SSE_REGPARM_MAX;
4570 cum->mmx_nregs = MMX_REGPARM_MAX;
4571 cum->warn_sse = true;
4572 cum->warn_mmx = true;
4574 /* Because type might mismatch in between caller and callee, we need to
4575 use actual type of function for local calls.
4576 FIXME: cgraph_analyze can be told to actually record if function uses
4577 va_start so for local functions maybe_vaarg can be made aggressive
4579 FIXME: once typesytem is fixed, we won't need this code anymore. */
4581 fntype = TREE_TYPE (fndecl);
4582 cum->maybe_vaarg = (fntype
4583 ? (!prototype_p (fntype) || stdarg_p (fntype))
4588 /* If there are variable arguments, then we won't pass anything
4589 in registers in 32-bit mode. */
4590 if (stdarg_p (fntype))
4600 /* Use ecx and edx registers if function has fastcall attribute,
4601 else look for regparm information. */
4604 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4610 cum->nregs = ix86_function_regparm (fntype, fndecl);
4613 /* Set up the number of SSE registers used for passing SFmode
4614 and DFmode arguments. Warn for mismatching ABI. */
4615 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4619 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4620 But in the case of vector types, it is some vector mode.
4622 When we have only some of our vector isa extensions enabled, then there
4623 are some modes for which vector_mode_supported_p is false. For these
4624 modes, the generic vector support in gcc will choose some non-vector mode
4625 in order to implement the type. By computing the natural mode, we'll
4626 select the proper ABI location for the operand and not depend on whatever
4627 the middle-end decides to do with these vector types. */
4629 static enum machine_mode
4630 type_natural_mode (const_tree type)
4632 enum machine_mode mode = TYPE_MODE (type);
4634 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4636 HOST_WIDE_INT size = int_size_in_bytes (type);
4637 if ((size == 8 || size == 16)
4638 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4639 && TYPE_VECTOR_SUBPARTS (type) > 1)
4641 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4643 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4644 mode = MIN_MODE_VECTOR_FLOAT;
4646 mode = MIN_MODE_VECTOR_INT;
4648 /* Get the mode which has this inner mode and number of units. */
4649 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4650 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4651 && GET_MODE_INNER (mode) == innermode)
4661 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4662 this may not agree with the mode that the type system has chosen for the
4663 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4664 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4667 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4672 if (orig_mode != BLKmode)
4673 tmp = gen_rtx_REG (orig_mode, regno);
4676 tmp = gen_rtx_REG (mode, regno);
4677 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4678 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4684 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4685 of this code is to classify each 8bytes of incoming argument by the register
4686 class and assign registers accordingly. */
4688 /* Return the union class of CLASS1 and CLASS2.
4689 See the x86-64 PS ABI for details. */
4691 static enum x86_64_reg_class
4692 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4694 /* Rule #1: If both classes are equal, this is the resulting class. */
4695 if (class1 == class2)
4698 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4700 if (class1 == X86_64_NO_CLASS)
4702 if (class2 == X86_64_NO_CLASS)
4705 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4706 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4707 return X86_64_MEMORY_CLASS;
4709 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4710 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4711 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4712 return X86_64_INTEGERSI_CLASS;
4713 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4714 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4715 return X86_64_INTEGER_CLASS;
4717 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4719 if (class1 == X86_64_X87_CLASS
4720 || class1 == X86_64_X87UP_CLASS
4721 || class1 == X86_64_COMPLEX_X87_CLASS
4722 || class2 == X86_64_X87_CLASS
4723 || class2 == X86_64_X87UP_CLASS
4724 || class2 == X86_64_COMPLEX_X87_CLASS)
4725 return X86_64_MEMORY_CLASS;
4727 /* Rule #6: Otherwise class SSE is used. */
4728 return X86_64_SSE_CLASS;
4731 /* Classify the argument of type TYPE and mode MODE.
4732 CLASSES will be filled by the register class used to pass each word
4733 of the operand. The number of words is returned. In case the parameter
4734 should be passed in memory, 0 is returned. As a special case for zero
4735 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4737 BIT_OFFSET is used internally for handling records and specifies offset
4738 of the offset in bits modulo 256 to avoid overflow cases.
4740 See the x86-64 PS ABI for details.
4744 classify_argument (enum machine_mode mode, const_tree type,
4745 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4747 HOST_WIDE_INT bytes =
4748 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4749 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4751 /* Variable sized entities are always passed/returned in memory. */
4755 if (mode != VOIDmode
4756 && targetm.calls.must_pass_in_stack (mode, type))
4759 if (type && AGGREGATE_TYPE_P (type))
4763 enum x86_64_reg_class subclasses[MAX_CLASSES];
4765 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
4769 for (i = 0; i < words; i++)
4770 classes[i] = X86_64_NO_CLASS;
4772 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
4773 signalize memory class, so handle it as special case. */
4776 classes[0] = X86_64_NO_CLASS;
4780 /* Classify each field of record and merge classes. */
4781 switch (TREE_CODE (type))
4784 /* And now merge the fields of structure. */
4785 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4787 if (TREE_CODE (field) == FIELD_DECL)
4791 if (TREE_TYPE (field) == error_mark_node)
4794 /* Bitfields are always classified as integer. Handle them
4795 early, since later code would consider them to be
4796 misaligned integers. */
4797 if (DECL_BIT_FIELD (field))
4799 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4800 i < ((int_bit_position (field) + (bit_offset % 64))
4801 + tree_low_cst (DECL_SIZE (field), 0)
4804 merge_classes (X86_64_INTEGER_CLASS,
4809 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4810 TREE_TYPE (field), subclasses,
4811 (int_bit_position (field)
4812 + bit_offset) % 256);
4815 for (i = 0; i < num; i++)
4818 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4820 merge_classes (subclasses[i], classes[i + pos]);
4828 /* Arrays are handled as small records. */
4831 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
4832 TREE_TYPE (type), subclasses, bit_offset);
4836 /* The partial classes are now full classes. */
4837 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
4838 subclasses[0] = X86_64_SSE_CLASS;
4839 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
4840 subclasses[0] = X86_64_INTEGER_CLASS;
4842 for (i = 0; i < words; i++)
4843 classes[i] = subclasses[i % num];
4848 case QUAL_UNION_TYPE:
4849 /* Unions are similar to RECORD_TYPE but offset is always 0.
4851 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4853 if (TREE_CODE (field) == FIELD_DECL)
4857 if (TREE_TYPE (field) == error_mark_node)
4860 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4861 TREE_TYPE (field), subclasses,
4865 for (i = 0; i < num; i++)
4866 classes[i] = merge_classes (subclasses[i], classes[i]);
4875 /* Final merger cleanup. */
4876 for (i = 0; i < words; i++)
4878 /* If one class is MEMORY, everything should be passed in
4880 if (classes[i] == X86_64_MEMORY_CLASS)
4883 /* The X86_64_SSEUP_CLASS should be always preceded by
4884 X86_64_SSE_CLASS. */
4885 if (classes[i] == X86_64_SSEUP_CLASS
4886 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
4887 classes[i] = X86_64_SSE_CLASS;
4889 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
4890 if (classes[i] == X86_64_X87UP_CLASS
4891 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
4892 classes[i] = X86_64_SSE_CLASS;
4897 /* Compute alignment needed. We align all types to natural boundaries with
4898 exception of XFmode that is aligned to 64bits. */
4899 if (mode != VOIDmode && mode != BLKmode)
4901 int mode_alignment = GET_MODE_BITSIZE (mode);
4904 mode_alignment = 128;
4905 else if (mode == XCmode)
4906 mode_alignment = 256;
4907 if (COMPLEX_MODE_P (mode))
4908 mode_alignment /= 2;
4909 /* Misaligned fields are always returned in memory. */
4910 if (bit_offset % mode_alignment)
4914 /* for V1xx modes, just use the base mode */
4915 if (VECTOR_MODE_P (mode) && mode != V1DImode
4916 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
4917 mode = GET_MODE_INNER (mode);
4919 /* Classification of atomic types. */
4924 classes[0] = X86_64_SSE_CLASS;
4927 classes[0] = X86_64_SSE_CLASS;
4928 classes[1] = X86_64_SSEUP_CLASS;
4937 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4938 classes[0] = X86_64_INTEGERSI_CLASS;
4940 classes[0] = X86_64_INTEGER_CLASS;
4944 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
4949 if (!(bit_offset % 64))
4950 classes[0] = X86_64_SSESF_CLASS;
4952 classes[0] = X86_64_SSE_CLASS;
4955 classes[0] = X86_64_SSEDF_CLASS;
4958 classes[0] = X86_64_X87_CLASS;
4959 classes[1] = X86_64_X87UP_CLASS;
4962 classes[0] = X86_64_SSE_CLASS;
4963 classes[1] = X86_64_SSEUP_CLASS;
4966 classes[0] = X86_64_SSE_CLASS;
4969 classes[0] = X86_64_SSEDF_CLASS;
4970 classes[1] = X86_64_SSEDF_CLASS;
4973 classes[0] = X86_64_COMPLEX_X87_CLASS;
4976 /* This modes is larger than 16 bytes. */
4984 classes[0] = X86_64_SSE_CLASS;
4985 classes[1] = X86_64_SSEUP_CLASS;
4992 classes[0] = X86_64_SSE_CLASS;
4998 gcc_assert (VECTOR_MODE_P (mode));
5003 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5005 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5006 classes[0] = X86_64_INTEGERSI_CLASS;
5008 classes[0] = X86_64_INTEGER_CLASS;
5009 classes[1] = X86_64_INTEGER_CLASS;
5010 return 1 + (bytes > 8);
5014 /* Examine the argument and return set number of register required in each
5015 class. Return 0 iff parameter should be passed in memory. */
5017 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5018 int *int_nregs, int *sse_nregs)
5020 enum x86_64_reg_class regclass[MAX_CLASSES];
5021 int n = classify_argument (mode, type, regclass, 0);
5027 for (n--; n >= 0; n--)
5028 switch (regclass[n])
5030 case X86_64_INTEGER_CLASS:
5031 case X86_64_INTEGERSI_CLASS:
5034 case X86_64_SSE_CLASS:
5035 case X86_64_SSESF_CLASS:
5036 case X86_64_SSEDF_CLASS:
5039 case X86_64_NO_CLASS:
5040 case X86_64_SSEUP_CLASS:
5042 case X86_64_X87_CLASS:
5043 case X86_64_X87UP_CLASS:
5047 case X86_64_COMPLEX_X87_CLASS:
5048 return in_return ? 2 : 0;
5049 case X86_64_MEMORY_CLASS:
5055 /* Construct container for the argument used by GCC interface. See
5056 FUNCTION_ARG for the detailed description. */
5059 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5060 const_tree type, int in_return, int nintregs, int nsseregs,
5061 const int *intreg, int sse_regno)
5063 /* The following variables hold the static issued_error state. */
5064 static bool issued_sse_arg_error;
5065 static bool issued_sse_ret_error;
5066 static bool issued_x87_ret_error;
5068 enum machine_mode tmpmode;
5070 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5071 enum x86_64_reg_class regclass[MAX_CLASSES];
5075 int needed_sseregs, needed_intregs;
5076 rtx exp[MAX_CLASSES];
5079 n = classify_argument (mode, type, regclass, 0);
5082 if (!examine_argument (mode, type, in_return, &needed_intregs,
5085 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5088 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5089 some less clueful developer tries to use floating-point anyway. */
5090 if (needed_sseregs && !TARGET_SSE)
5094 if (!issued_sse_ret_error)
5096 error ("SSE register return with SSE disabled");
5097 issued_sse_ret_error = true;
5100 else if (!issued_sse_arg_error)
5102 error ("SSE register argument with SSE disabled");
5103 issued_sse_arg_error = true;
5108 /* Likewise, error if the ABI requires us to return values in the
5109 x87 registers and the user specified -mno-80387. */
5110 if (!TARGET_80387 && in_return)
5111 for (i = 0; i < n; i++)
5112 if (regclass[i] == X86_64_X87_CLASS
5113 || regclass[i] == X86_64_X87UP_CLASS
5114 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5116 if (!issued_x87_ret_error)
5118 error ("x87 register return with x87 disabled");
5119 issued_x87_ret_error = true;
5124 /* First construct simple cases. Avoid SCmode, since we want to use
5125 single register to pass this type. */
5126 if (n == 1 && mode != SCmode)
5127 switch (regclass[0])
5129 case X86_64_INTEGER_CLASS:
5130 case X86_64_INTEGERSI_CLASS:
5131 return gen_rtx_REG (mode, intreg[0]);
5132 case X86_64_SSE_CLASS:
5133 case X86_64_SSESF_CLASS:
5134 case X86_64_SSEDF_CLASS:
5135 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
5136 case X86_64_X87_CLASS:
5137 case X86_64_COMPLEX_X87_CLASS:
5138 return gen_rtx_REG (mode, FIRST_STACK_REG);
5139 case X86_64_NO_CLASS:
5140 /* Zero sized array, struct or class. */
5145 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5146 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5147 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5150 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5151 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5152 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5153 && regclass[1] == X86_64_INTEGER_CLASS
5154 && (mode == CDImode || mode == TImode || mode == TFmode)
5155 && intreg[0] + 1 == intreg[1])
5156 return gen_rtx_REG (mode, intreg[0]);
5158 /* Otherwise figure out the entries of the PARALLEL. */
5159 for (i = 0; i < n; i++)
5161 switch (regclass[i])
5163 case X86_64_NO_CLASS:
5165 case X86_64_INTEGER_CLASS:
5166 case X86_64_INTEGERSI_CLASS:
5167 /* Merge TImodes on aligned occasions here too. */
5168 if (i * 8 + 8 > bytes)
5169 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5170 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5174 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5175 if (tmpmode == BLKmode)
5177 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5178 gen_rtx_REG (tmpmode, *intreg),
5182 case X86_64_SSESF_CLASS:
5183 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5184 gen_rtx_REG (SFmode,
5185 SSE_REGNO (sse_regno)),
5189 case X86_64_SSEDF_CLASS:
5190 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5191 gen_rtx_REG (DFmode,
5192 SSE_REGNO (sse_regno)),
5196 case X86_64_SSE_CLASS:
5197 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
5201 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5202 gen_rtx_REG (tmpmode,
5203 SSE_REGNO (sse_regno)),
5205 if (tmpmode == TImode)
5214 /* Empty aligned struct, union or class. */
5218 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5219 for (i = 0; i < nexps; i++)
5220 XVECEXP (ret, 0, i) = exp [i];
5224 /* Update the data in CUM to advance over an argument of mode MODE
5225 and data type TYPE. (TYPE is null for libcalls where that information
5226 may not be available.) */
5229 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5230 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5246 cum->words += words;
5247 cum->nregs -= words;
5248 cum->regno += words;
5250 if (cum->nregs <= 0)
5258 if (cum->float_in_sse < 2)
5261 if (cum->float_in_sse < 1)
5272 if (!type || !AGGREGATE_TYPE_P (type))
5274 cum->sse_words += words;
5275 cum->sse_nregs -= 1;
5276 cum->sse_regno += 1;
5277 if (cum->sse_nregs <= 0)
5290 if (!type || !AGGREGATE_TYPE_P (type))
5292 cum->mmx_words += words;
5293 cum->mmx_nregs -= 1;
5294 cum->mmx_regno += 1;
5295 if (cum->mmx_nregs <= 0)
5306 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5307 tree type, HOST_WIDE_INT words)
5309 int int_nregs, sse_nregs;
5311 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5312 cum->words += words;
5313 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5315 cum->nregs -= int_nregs;
5316 cum->sse_nregs -= sse_nregs;
5317 cum->regno += int_nregs;
5318 cum->sse_regno += sse_nregs;
5321 cum->words += words;
5325 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5326 HOST_WIDE_INT words)
5328 /* Otherwise, this should be passed indirect. */
5329 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5331 cum->words += words;
5340 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5341 tree type, int named ATTRIBUTE_UNUSED)
5343 HOST_WIDE_INT bytes, words;
5345 if (mode == BLKmode)
5346 bytes = int_size_in_bytes (type);
5348 bytes = GET_MODE_SIZE (mode);
5349 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5352 mode = type_natural_mode (type);
5354 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5355 function_arg_advance_ms_64 (cum, bytes, words);
5356 else if (TARGET_64BIT)
5357 function_arg_advance_64 (cum, mode, type, words);
5359 function_arg_advance_32 (cum, mode, type, bytes, words);
5362 /* Define where to put the arguments to a function.
5363 Value is zero to push the argument on the stack,
5364 or a hard register in which to store the argument.
5366 MODE is the argument's machine mode.
5367 TYPE is the data type of the argument (as a tree).
5368 This is null for libcalls where that information may
5370 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5371 the preceding args and about the function being called.
5372 NAMED is nonzero if this argument is a named parameter
5373 (otherwise it is an extra parameter matching an ellipsis). */
5376 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5377 enum machine_mode orig_mode, tree type,
5378 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5380 static bool warnedsse, warnedmmx;
5382 /* Avoid the AL settings for the Unix64 ABI. */
5383 if (mode == VOIDmode)
5399 if (words <= cum->nregs)
5401 int regno = cum->regno;
5403 /* Fastcall allocates the first two DWORD (SImode) or
5404 smaller arguments to ECX and EDX if it isn't an
5410 || (type && AGGREGATE_TYPE_P (type)))
5413 /* ECX not EAX is the first allocated register. */
5414 if (regno == AX_REG)
5417 return gen_rtx_REG (mode, regno);
5422 if (cum->float_in_sse < 2)
5425 if (cum->float_in_sse < 1)
5435 if (!type || !AGGREGATE_TYPE_P (type))
5437 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5440 warning (0, "SSE vector argument without SSE enabled "
5444 return gen_reg_or_parallel (mode, orig_mode,
5445 cum->sse_regno + FIRST_SSE_REG);
5454 if (!type || !AGGREGATE_TYPE_P (type))
5456 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5459 warning (0, "MMX vector argument without MMX enabled "
5463 return gen_reg_or_parallel (mode, orig_mode,
5464 cum->mmx_regno + FIRST_MMX_REG);
5473 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5474 enum machine_mode orig_mode, tree type)
5476 /* Handle a hidden AL argument containing number of registers
5477 for varargs x86-64 functions. */
5478 if (mode == VOIDmode)
5479 return GEN_INT (cum->maybe_vaarg
5480 ? (cum->sse_nregs < 0
5481 ? (cum->call_abi == DEFAULT_ABI
5483 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5484 : X64_SSE_REGPARM_MAX))
5488 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5490 &x86_64_int_parameter_registers [cum->regno],
5495 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5496 enum machine_mode orig_mode, int named,
5497 HOST_WIDE_INT bytes)
5501 /* Avoid the AL settings for the Unix64 ABI. */
5502 if (mode == VOIDmode)
5505 /* If we've run out of registers, it goes on the stack. */
5506 if (cum->nregs == 0)
5509 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5511 /* Only floating point modes are passed in anything but integer regs. */
5512 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5515 regno = cum->regno + FIRST_SSE_REG;
5520 /* Unnamed floating parameters are passed in both the
5521 SSE and integer registers. */
5522 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5523 t2 = gen_rtx_REG (mode, regno);
5524 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5525 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5526 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5529 /* Handle aggregated types passed in register. */
5530 if (orig_mode == BLKmode)
5532 if (bytes > 0 && bytes <= 8)
5533 mode = (bytes > 4 ? DImode : SImode);
5534 if (mode == BLKmode)
5538 return gen_reg_or_parallel (mode, orig_mode, regno);
5542 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5543 tree type, int named)
5545 enum machine_mode mode = omode;
5546 HOST_WIDE_INT bytes, words;
5548 if (mode == BLKmode)
5549 bytes = int_size_in_bytes (type);
5551 bytes = GET_MODE_SIZE (mode);
5552 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5554 /* To simplify the code below, represent vector types with a vector mode
5555 even if MMX/SSE are not active. */
5556 if (type && TREE_CODE (type) == VECTOR_TYPE)
5557 mode = type_natural_mode (type);
5559 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5560 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5561 else if (TARGET_64BIT)
5562 return function_arg_64 (cum, mode, omode, type);
5564 return function_arg_32 (cum, mode, omode, type, bytes, words);
5567 /* A C expression that indicates when an argument must be passed by
5568 reference. If nonzero for an argument, a copy of that argument is
5569 made in memory and a pointer to the argument is passed instead of
5570 the argument itself. The pointer is passed in whatever way is
5571 appropriate for passing a pointer to that type. */
5574 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5575 enum machine_mode mode ATTRIBUTE_UNUSED,
5576 const_tree type, bool named ATTRIBUTE_UNUSED)
5578 /* See Windows x64 Software Convention. */
5579 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5581 int msize = (int) GET_MODE_SIZE (mode);
5584 /* Arrays are passed by reference. */
5585 if (TREE_CODE (type) == ARRAY_TYPE)
5588 if (AGGREGATE_TYPE_P (type))
5590 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
5591 are passed by reference. */
5592 msize = int_size_in_bytes (type);
5596 /* __m128 is passed by reference. */
5598 case 1: case 2: case 4: case 8:
5604 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
5610 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
5613 contains_aligned_value_p (tree type)
5615 enum machine_mode mode = TYPE_MODE (type);
5616 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
5620 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
5622 if (TYPE_ALIGN (type) < 128)
5625 if (AGGREGATE_TYPE_P (type))
5627 /* Walk the aggregates recursively. */
5628 switch (TREE_CODE (type))
5632 case QUAL_UNION_TYPE:
5636 /* Walk all the structure fields. */
5637 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5639 if (TREE_CODE (field) == FIELD_DECL
5640 && contains_aligned_value_p (TREE_TYPE (field)))
5647 /* Just for use if some languages passes arrays by value. */
5648 if (contains_aligned_value_p (TREE_TYPE (type)))
5659 /* Gives the alignment boundary, in bits, of an argument with the
5660 specified mode and type. */
5663 ix86_function_arg_boundary (enum machine_mode mode, tree type)
5668 /* Since canonical type is used for call, we convert it to
5669 canonical type if needed. */
5670 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
5671 type = TYPE_CANONICAL (type);
5672 align = TYPE_ALIGN (type);
5675 align = GET_MODE_ALIGNMENT (mode);
5676 if (align < PARM_BOUNDARY)
5677 align = PARM_BOUNDARY;
5678 /* In 32bit, only _Decimal128 and __float128 are aligned to their
5679 natural boundaries. */
5680 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
5682 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
5683 make an exception for SSE modes since these require 128bit
5686 The handling here differs from field_alignment. ICC aligns MMX
5687 arguments to 4 byte boundaries, while structure fields are aligned
5688 to 8 byte boundaries. */
5691 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
5692 align = PARM_BOUNDARY;
5696 if (!contains_aligned_value_p (type))
5697 align = PARM_BOUNDARY;
5700 if (align > BIGGEST_ALIGNMENT)
5701 align = BIGGEST_ALIGNMENT;
5705 /* Return true if N is a possible register number of function value. */
5708 ix86_function_value_regno_p (int regno)
5715 case FIRST_FLOAT_REG:
5716 /* TODO: The function should depend on current function ABI but
5717 builtins.c would need updating then. Therefore we use the
5719 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
5721 return TARGET_FLOAT_RETURNS_IN_80387;
5727 if (TARGET_MACHO || TARGET_64BIT)
5735 /* Define how to find the value returned by a function.
5736 VALTYPE is the data type of the value (as a tree).
5737 If the precise function being called is known, FUNC is its FUNCTION_DECL;
5738 otherwise, FUNC is 0. */
5741 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
5742 const_tree fntype, const_tree fn)
5746 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
5747 we normally prevent this case when mmx is not available. However
5748 some ABIs may require the result to be returned like DImode. */
5749 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5750 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
5752 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
5753 we prevent this case when sse is not available. However some ABIs
5754 may require the result to be returned like integer TImode. */
5755 else if (mode == TImode
5756 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5757 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
5759 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
5760 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
5761 regno = FIRST_FLOAT_REG;
5763 /* Most things go in %eax. */
5766 /* Override FP return register with %xmm0 for local functions when
5767 SSE math is enabled or for functions with sseregparm attribute. */
5768 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
5770 int sse_level = ix86_function_sseregparm (fntype, fn, false);
5771 if ((sse_level >= 1 && mode == SFmode)
5772 || (sse_level == 2 && mode == DFmode))
5773 regno = FIRST_SSE_REG;
5776 return gen_rtx_REG (orig_mode, regno);
5780 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
5785 /* Handle libcalls, which don't provide a type node. */
5786 if (valtype == NULL)
5798 return gen_rtx_REG (mode, FIRST_SSE_REG);
5801 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
5805 return gen_rtx_REG (mode, AX_REG);
5809 ret = construct_container (mode, orig_mode, valtype, 1,
5810 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
5811 x86_64_int_return_registers, 0);
5813 /* For zero sized structures, construct_container returns NULL, but we
5814 need to keep rest of compiler happy by returning meaningful value. */
5816 ret = gen_rtx_REG (orig_mode, AX_REG);
5822 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
5824 unsigned int regno = AX_REG;
5828 switch (GET_MODE_SIZE (mode))
5831 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
5832 && !COMPLEX_MODE_P (mode))
5833 regno = FIRST_SSE_REG;
5837 if (mode == SFmode || mode == DFmode)
5838 regno = FIRST_SSE_REG;
5844 return gen_rtx_REG (orig_mode, regno);
5848 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
5849 enum machine_mode orig_mode, enum machine_mode mode)
5851 const_tree fn, fntype;
5854 if (fntype_or_decl && DECL_P (fntype_or_decl))
5855 fn = fntype_or_decl;
5856 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
5858 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
5859 return function_value_ms_64 (orig_mode, mode);
5860 else if (TARGET_64BIT)
5861 return function_value_64 (orig_mode, mode, valtype);
5863 return function_value_32 (orig_mode, mode, fntype, fn);
5867 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
5868 bool outgoing ATTRIBUTE_UNUSED)
5870 enum machine_mode mode, orig_mode;
5872 orig_mode = TYPE_MODE (valtype);
5873 mode = type_natural_mode (valtype);
5874 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
5878 ix86_libcall_value (enum machine_mode mode)
5880 return ix86_function_value_1 (NULL, NULL, mode, mode);
5883 /* Return true iff type is returned in memory. */
5885 static int ATTRIBUTE_UNUSED
5886 return_in_memory_32 (const_tree type, enum machine_mode mode)
5890 if (mode == BLKmode)
5893 size = int_size_in_bytes (type);
5895 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
5898 if (VECTOR_MODE_P (mode) || mode == TImode)
5900 /* User-created vectors small enough to fit in EAX. */
5904 /* MMX/3dNow values are returned in MM0,
5905 except when it doesn't exits. */
5907 return (TARGET_MMX ? 0 : 1);
5909 /* SSE values are returned in XMM0, except when it doesn't exist. */
5911 return (TARGET_SSE ? 0 : 1);
5922 static int ATTRIBUTE_UNUSED
5923 return_in_memory_64 (const_tree type, enum machine_mode mode)
5925 int needed_intregs, needed_sseregs;
5926 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
5929 static int ATTRIBUTE_UNUSED
5930 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
5932 HOST_WIDE_INT size = int_size_in_bytes (type);
5934 /* __m128 is returned in xmm0. */
5935 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
5936 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
5939 /* Otherwise, the size must be exactly in [1248]. */
5940 return (size != 1 && size != 2 && size != 4 && size != 8);
5944 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5946 #ifdef SUBTARGET_RETURN_IN_MEMORY
5947 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
5949 const enum machine_mode mode = type_natural_mode (type);
5951 if (TARGET_64BIT_MS_ABI)
5952 return return_in_memory_ms_64 (type, mode);
5953 else if (TARGET_64BIT)
5954 return return_in_memory_64 (type, mode);
5956 return return_in_memory_32 (type, mode);
5960 /* Return false iff TYPE is returned in memory. This version is used
5961 on Solaris 10. It is similar to the generic ix86_return_in_memory,
5962 but differs notably in that when MMX is available, 8-byte vectors
5963 are returned in memory, rather than in MMX registers. */
5966 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5969 enum machine_mode mode = type_natural_mode (type);
5972 return return_in_memory_64 (type, mode);
5974 if (mode == BLKmode)
5977 size = int_size_in_bytes (type);
5979 if (VECTOR_MODE_P (mode))
5981 /* Return in memory only if MMX registers *are* available. This
5982 seems backwards, but it is consistent with the existing
5989 else if (mode == TImode)
5991 else if (mode == XFmode)
5997 /* When returning SSE vector types, we have a choice of either
5998 (1) being abi incompatible with a -march switch, or
5999 (2) generating an error.
6000 Given no good solution, I think the safest thing is one warning.
6001 The user won't be able to use -Werror, but....
6003 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6004 called in response to actually generating a caller or callee that
6005 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6006 via aggregate_value_p for general type probing from tree-ssa. */
6009 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6011 static bool warnedsse, warnedmmx;
6013 if (!TARGET_64BIT && type)
6015 /* Look at the return type of the function, not the function type. */
6016 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6018 if (!TARGET_SSE && !warnedsse)
6021 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6024 warning (0, "SSE vector return without SSE enabled "
6029 if (!TARGET_MMX && !warnedmmx)
6031 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6034 warning (0, "MMX vector return without MMX enabled "
6044 /* Create the va_list data type. */
6046 /* Returns the calling convention specific va_list date type.
6047 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6050 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6052 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6054 /* For i386 we use plain pointer to argument area. */
6055 if (!TARGET_64BIT || abi == MS_ABI)
6056 return build_pointer_type (char_type_node);
6058 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6059 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6061 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6062 unsigned_type_node);
6063 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6064 unsigned_type_node);
6065 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6067 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6070 va_list_gpr_counter_field = f_gpr;
6071 va_list_fpr_counter_field = f_fpr;
6073 DECL_FIELD_CONTEXT (f_gpr) = record;
6074 DECL_FIELD_CONTEXT (f_fpr) = record;
6075 DECL_FIELD_CONTEXT (f_ovf) = record;
6076 DECL_FIELD_CONTEXT (f_sav) = record;
6078 TREE_CHAIN (record) = type_decl;
6079 TYPE_NAME (record) = type_decl;
6080 TYPE_FIELDS (record) = f_gpr;
6081 TREE_CHAIN (f_gpr) = f_fpr;
6082 TREE_CHAIN (f_fpr) = f_ovf;
6083 TREE_CHAIN (f_ovf) = f_sav;
6085 layout_type (record);
6087 /* The correct type is an array type of one element. */
6088 return build_array_type (record, build_index_type (size_zero_node));
6091 /* Setup the builtin va_list data type and for 64-bit the additional
6092 calling convention specific va_list data types. */
6095 ix86_build_builtin_va_list (void)
6097 tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
6099 /* Initialize abi specific va_list builtin types. */
6103 if (DEFAULT_ABI == MS_ABI)
6105 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6106 if (TREE_CODE (t) != RECORD_TYPE)
6107 t = build_variant_type_copy (t);
6108 sysv_va_list_type_node = t;
6113 if (TREE_CODE (t) != RECORD_TYPE)
6114 t = build_variant_type_copy (t);
6115 sysv_va_list_type_node = t;
6117 if (DEFAULT_ABI != MS_ABI)
6119 t = ix86_build_builtin_va_list_abi (MS_ABI);
6120 if (TREE_CODE (t) != RECORD_TYPE)
6121 t = build_variant_type_copy (t);
6122 ms_va_list_type_node = t;
6127 if (TREE_CODE (t) != RECORD_TYPE)
6128 t = build_variant_type_copy (t);
6129 ms_va_list_type_node = t;
6136 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6139 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6148 int regparm = ix86_regparm;
6150 if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
6151 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6153 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
6156 /* Indicate to allocate space on the stack for varargs save area. */
6157 ix86_save_varrargs_registers = 1;
6158 /* We need 16-byte stack alignment to save SSE registers. If user
6159 asked for lower preferred_stack_boundary, lets just hope that he knows
6160 what he is doing and won't varargs SSE values.
6162 We also may end up assuming that only 64bit values are stored in SSE
6163 register let some floating point program work. */
6164 if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
6165 crtl->stack_alignment_needed = BIGGEST_ALIGNMENT;
6167 save_area = frame_pointer_rtx;
6168 set = get_varargs_alias_set ();
6170 for (i = cum->regno;
6172 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6175 mem = gen_rtx_MEM (Pmode,
6176 plus_constant (save_area, i * UNITS_PER_WORD));
6177 MEM_NOTRAP_P (mem) = 1;
6178 set_mem_alias_set (mem, set);
6179 emit_move_insn (mem, gen_rtx_REG (Pmode,
6180 x86_64_int_parameter_registers[i]));
6183 if (cum->sse_nregs && cfun->va_list_fpr_size)
6185 /* Now emit code to save SSE registers. The AX parameter contains number
6186 of SSE parameter registers used to call this function. We use
6187 sse_prologue_save insn template that produces computed jump across
6188 SSE saves. We need some preparation work to get this working. */
6190 label = gen_label_rtx ();
6191 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6193 /* Compute address to jump to :
6194 label - eax*4 + nnamed_sse_arguments*4 */
6195 tmp_reg = gen_reg_rtx (Pmode);
6196 nsse_reg = gen_reg_rtx (Pmode);
6197 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6198 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6199 gen_rtx_MULT (Pmode, nsse_reg,
6204 gen_rtx_CONST (DImode,
6205 gen_rtx_PLUS (DImode,
6207 GEN_INT (cum->sse_regno * 4))));
6209 emit_move_insn (nsse_reg, label_ref);
6210 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6212 /* Compute address of memory block we save into. We always use pointer
6213 pointing 127 bytes after first byte to store - this is needed to keep
6214 instruction size limited by 4 bytes. */
6215 tmp_reg = gen_reg_rtx (Pmode);
6216 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6217 plus_constant (save_area,
6218 8 * X86_64_REGPARM_MAX + 127)));
6219 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6220 MEM_NOTRAP_P (mem) = 1;
6221 set_mem_alias_set (mem, set);
6222 set_mem_align (mem, BITS_PER_WORD);
6224 /* And finally do the dirty job! */
6225 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6226 GEN_INT (cum->sse_regno), label));
6231 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6233 alias_set_type set = get_varargs_alias_set ();
6236 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6240 mem = gen_rtx_MEM (Pmode,
6241 plus_constant (virtual_incoming_args_rtx,
6242 i * UNITS_PER_WORD));
6243 MEM_NOTRAP_P (mem) = 1;
6244 set_mem_alias_set (mem, set);
6246 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6247 emit_move_insn (mem, reg);
6252 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6253 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6256 CUMULATIVE_ARGS next_cum;
6259 /* This argument doesn't appear to be used anymore. Which is good,
6260 because the old code here didn't suppress rtl generation. */
6261 gcc_assert (!no_rtl);
6266 fntype = TREE_TYPE (current_function_decl);
6268 /* For varargs, we do not want to skip the dummy va_dcl argument.
6269 For stdargs, we do want to skip the last named argument. */
6271 if (stdarg_p (fntype))
6272 function_arg_advance (&next_cum, mode, type, 1);
6274 if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
6275 setup_incoming_varargs_ms_64 (&next_cum);
6277 setup_incoming_varargs_64 (&next_cum);
6280 /* Checks if TYPE is of kind va_list char *. */
6283 is_va_list_char_pointer (tree type)
6287 /* For 32-bit it is always true. */
6290 canonic = ix86_canonical_va_list_type (type);
6291 return (canonic == ms_va_list_type_node
6292 || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
6295 /* Implement va_start. */
6298 ix86_va_start (tree valist, rtx nextarg)
6300 HOST_WIDE_INT words, n_gpr, n_fpr;
6301 tree f_gpr, f_fpr, f_ovf, f_sav;
6302 tree gpr, fpr, ovf, sav, t;
6305 /* Only 64bit target needs something special. */
6306 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6308 std_expand_builtin_va_start (valist, nextarg);
6312 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6313 f_fpr = TREE_CHAIN (f_gpr);
6314 f_ovf = TREE_CHAIN (f_fpr);
6315 f_sav = TREE_CHAIN (f_ovf);
6317 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6318 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6319 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6320 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6321 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6323 /* Count number of gp and fp argument registers used. */
6324 words = crtl->args.info.words;
6325 n_gpr = crtl->args.info.regno;
6326 n_fpr = crtl->args.info.sse_regno;
6328 if (cfun->va_list_gpr_size)
6330 type = TREE_TYPE (gpr);
6331 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
6332 build_int_cst (type, n_gpr * 8));
6333 TREE_SIDE_EFFECTS (t) = 1;
6334 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6337 if (cfun->va_list_fpr_size)
6339 type = TREE_TYPE (fpr);
6340 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
6341 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6342 TREE_SIDE_EFFECTS (t) = 1;
6343 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6346 /* Find the overflow area. */
6347 type = TREE_TYPE (ovf);
6348 t = make_tree (type, virtual_incoming_args_rtx);
6350 t = build2 (POINTER_PLUS_EXPR, type, t,
6351 size_int (words * UNITS_PER_WORD));
6352 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
6353 TREE_SIDE_EFFECTS (t) = 1;
6354 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6356 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
6358 /* Find the register save area.
6359 Prologue of the function save it right above stack frame. */
6360 type = TREE_TYPE (sav);
6361 t = make_tree (type, frame_pointer_rtx);
6362 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
6363 TREE_SIDE_EFFECTS (t) = 1;
6364 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6368 /* Implement va_arg. */
6371 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
6373 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6374 tree f_gpr, f_fpr, f_ovf, f_sav;
6375 tree gpr, fpr, ovf, sav, t;
6377 tree lab_false, lab_over = NULL_TREE;
6382 enum machine_mode nat_mode;
6385 /* Only 64bit target needs something special. */
6386 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6387 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6389 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6390 f_fpr = TREE_CHAIN (f_gpr);
6391 f_ovf = TREE_CHAIN (f_fpr);
6392 f_sav = TREE_CHAIN (f_ovf);
6394 valist = build_va_arg_indirect_ref (valist);
6395 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6396 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6397 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6398 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6400 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6402 type = build_pointer_type (type);
6403 size = int_size_in_bytes (type);
6404 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6406 nat_mode = type_natural_mode (type);
6407 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
6408 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6411 /* Pull the value out of the saved registers. */
6413 addr = create_tmp_var (ptr_type_node, "addr");
6414 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6418 int needed_intregs, needed_sseregs;
6420 tree int_addr, sse_addr;
6422 lab_false = create_artificial_label ();
6423 lab_over = create_artificial_label ();
6425 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6427 need_temp = (!REG_P (container)
6428 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6429 || TYPE_ALIGN (type) > 128));
6431 /* In case we are passing structure, verify that it is consecutive block
6432 on the register save area. If not we need to do moves. */
6433 if (!need_temp && !REG_P (container))
6435 /* Verify that all registers are strictly consecutive */
6436 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6440 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6442 rtx slot = XVECEXP (container, 0, i);
6443 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6444 || INTVAL (XEXP (slot, 1)) != i * 16)
6452 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6454 rtx slot = XVECEXP (container, 0, i);
6455 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6456 || INTVAL (XEXP (slot, 1)) != i * 8)
6468 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6469 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6470 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6471 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6474 /* First ensure that we fit completely in registers. */
6477 t = build_int_cst (TREE_TYPE (gpr),
6478 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6479 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6480 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6481 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6482 gimplify_and_add (t, pre_p);
6486 t = build_int_cst (TREE_TYPE (fpr),
6487 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6488 + X86_64_REGPARM_MAX * 8);
6489 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6490 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6491 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6492 gimplify_and_add (t, pre_p);
6495 /* Compute index to start of area used for integer regs. */
6498 /* int_addr = gpr + sav; */
6499 t = fold_convert (sizetype, gpr);
6500 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6501 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
6502 gimplify_and_add (t, pre_p);
6506 /* sse_addr = fpr + sav; */
6507 t = fold_convert (sizetype, fpr);
6508 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6509 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
6510 gimplify_and_add (t, pre_p);
6515 tree temp = create_tmp_var (type, "va_arg_tmp");
6518 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6519 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
6520 gimplify_and_add (t, pre_p);
6522 for (i = 0; i < XVECLEN (container, 0); i++)
6524 rtx slot = XVECEXP (container, 0, i);
6525 rtx reg = XEXP (slot, 0);
6526 enum machine_mode mode = GET_MODE (reg);
6527 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6528 tree addr_type = build_pointer_type (piece_type);
6531 tree dest_addr, dest;
6533 if (SSE_REGNO_P (REGNO (reg)))
6535 src_addr = sse_addr;
6536 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
6540 src_addr = int_addr;
6541 src_offset = REGNO (reg) * 8;
6543 src_addr = fold_convert (addr_type, src_addr);
6544 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
6545 size_int (src_offset));
6546 src = build_va_arg_indirect_ref (src_addr);
6548 dest_addr = fold_convert (addr_type, addr);
6549 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
6550 size_int (INTVAL (XEXP (slot, 1))));
6551 dest = build_va_arg_indirect_ref (dest_addr);
6553 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
6554 gimplify_and_add (t, pre_p);
6560 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
6561 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
6562 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
6563 gimplify_and_add (t, pre_p);
6567 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
6568 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
6569 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
6570 gimplify_and_add (t, pre_p);
6573 t = build1 (GOTO_EXPR, void_type_node, lab_over);
6574 gimplify_and_add (t, pre_p);
6576 t = build1 (LABEL_EXPR, void_type_node, lab_false);
6577 append_to_statement_list (t, pre_p);
6580 /* ... otherwise out of the overflow area. */
6582 /* When we align parameter on stack for caller, if the parameter
6583 alignment is beyond PREFERRED_STACK_BOUNDARY, it will be
6584 aligned at PREFERRED_STACK_BOUNDARY. We will match callee
6585 here with caller. */
6586 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
6587 if ((unsigned int) arg_boundary > PREFERRED_STACK_BOUNDARY)
6588 arg_boundary = PREFERRED_STACK_BOUNDARY;
6590 /* Care for on-stack alignment if needed. */
6591 if (arg_boundary <= 64
6592 || integer_zerop (TYPE_SIZE (type)))
6596 HOST_WIDE_INT align = arg_boundary / 8;
6597 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
6598 size_int (align - 1));
6599 t = fold_convert (sizetype, t);
6600 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6602 t = fold_convert (TREE_TYPE (ovf), t);
6604 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
6606 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
6607 gimplify_and_add (t2, pre_p);
6609 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
6610 size_int (rsize * UNITS_PER_WORD));
6611 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
6612 gimplify_and_add (t, pre_p);
6616 t = build1 (LABEL_EXPR, void_type_node, lab_over);
6617 append_to_statement_list (t, pre_p);
6620 ptrtype = build_pointer_type (type);
6621 addr = fold_convert (ptrtype, addr);
6624 addr = build_va_arg_indirect_ref (addr);
6625 return build_va_arg_indirect_ref (addr);
6628 /* Return nonzero if OPNUM's MEM should be matched
6629 in movabs* patterns. */
6632 ix86_check_movabs (rtx insn, int opnum)
6636 set = PATTERN (insn);
6637 if (GET_CODE (set) == PARALLEL)
6638 set = XVECEXP (set, 0, 0);
6639 gcc_assert (GET_CODE (set) == SET);
6640 mem = XEXP (set, opnum);
6641 while (GET_CODE (mem) == SUBREG)
6642 mem = SUBREG_REG (mem);
6643 gcc_assert (MEM_P (mem));
6644 return (volatile_ok || !MEM_VOLATILE_P (mem));
6647 /* Initialize the table of extra 80387 mathematical constants. */
6650 init_ext_80387_constants (void)
6652 static const char * cst[5] =
6654 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
6655 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
6656 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
6657 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
6658 "3.1415926535897932385128089594061862044", /* 4: fldpi */
6662 for (i = 0; i < 5; i++)
6664 real_from_string (&ext_80387_constants_table[i], cst[i]);
6665 /* Ensure each constant is rounded to XFmode precision. */
6666 real_convert (&ext_80387_constants_table[i],
6667 XFmode, &ext_80387_constants_table[i]);
6670 ext_80387_constants_init = 1;
6673 /* Return true if the constant is something that can be loaded with
6674 a special instruction. */
6677 standard_80387_constant_p (rtx x)
6679 enum machine_mode mode = GET_MODE (x);
6683 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
6686 if (x == CONST0_RTX (mode))
6688 if (x == CONST1_RTX (mode))
6691 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6693 /* For XFmode constants, try to find a special 80387 instruction when
6694 optimizing for size or on those CPUs that benefit from them. */
6696 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
6700 if (! ext_80387_constants_init)
6701 init_ext_80387_constants ();
6703 for (i = 0; i < 5; i++)
6704 if (real_identical (&r, &ext_80387_constants_table[i]))
6708 /* Load of the constant -0.0 or -1.0 will be split as
6709 fldz;fchs or fld1;fchs sequence. */
6710 if (real_isnegzero (&r))
6712 if (real_identical (&r, &dconstm1))
6718 /* Return the opcode of the special instruction to be used to load
6722 standard_80387_constant_opcode (rtx x)
6724 switch (standard_80387_constant_p (x))
6748 /* Return the CONST_DOUBLE representing the 80387 constant that is
6749 loaded by the specified special instruction. The argument IDX
6750 matches the return value from standard_80387_constant_p. */
6753 standard_80387_constant_rtx (int idx)
6757 if (! ext_80387_constants_init)
6758 init_ext_80387_constants ();
6774 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
6778 /* Return 1 if mode is a valid mode for sse. */
6780 standard_sse_mode_p (enum machine_mode mode)
6797 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
6800 standard_sse_constant_p (rtx x)
6802 enum machine_mode mode = GET_MODE (x);
6804 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
6806 if (vector_all_ones_operand (x, mode)
6807 && standard_sse_mode_p (mode))
6808 return TARGET_SSE2 ? 2 : -1;
6813 /* Return the opcode of the special instruction to be used to load
6817 standard_sse_constant_opcode (rtx insn, rtx x)
6819 switch (standard_sse_constant_p (x))
6822 if (get_attr_mode (insn) == MODE_V4SF)
6823 return "xorps\t%0, %0";
6824 else if (get_attr_mode (insn) == MODE_V2DF)
6825 return "xorpd\t%0, %0";
6827 return "pxor\t%0, %0";
6829 return "pcmpeqd\t%0, %0";
6834 /* Returns 1 if OP contains a symbol reference */
6837 symbolic_reference_mentioned_p (rtx op)
6842 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
6845 fmt = GET_RTX_FORMAT (GET_CODE (op));
6846 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
6852 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
6853 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
6857 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
6864 /* Return 1 if it is appropriate to emit `ret' instructions in the
6865 body of a function. Do this only if the epilogue is simple, needing a
6866 couple of insns. Prior to reloading, we can't tell how many registers
6867 must be saved, so return 0 then. Return 0 if there is no frame
6868 marker to de-allocate. */
6871 ix86_can_use_return_insn_p (void)
6873 struct ix86_frame frame;
6875 if (! reload_completed || frame_pointer_needed)
6878 /* Don't allow more than 32 pop, since that's all we can do
6879 with one instruction. */
6880 if (crtl->args.pops_args
6881 && crtl->args.size >= 32768)
6884 ix86_compute_frame_layout (&frame);
6885 return frame.to_allocate == 0 && frame.nregs == 0;
6888 /* Value should be nonzero if functions must have frame pointers.
6889 Zero means the frame pointer need not be set up (and parms may
6890 be accessed via the stack pointer) in functions that seem suitable. */
6893 ix86_frame_pointer_required (void)
6895 /* If we accessed previous frames, then the generated code expects
6896 to be able to access the saved ebp value in our frame. */
6897 if (cfun->machine->accesses_prev_frame)
6900 /* Several x86 os'es need a frame pointer for other reasons,
6901 usually pertaining to setjmp. */
6902 if (SUBTARGET_FRAME_POINTER_REQUIRED)
6905 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
6906 the frame pointer by default. Turn it back on now if we've not
6907 got a leaf function. */
6908 if (TARGET_OMIT_LEAF_FRAME_POINTER
6909 && (!current_function_is_leaf
6910 || ix86_current_function_calls_tls_descriptor))
6919 /* Record that the current function accesses previous call frames. */
6922 ix86_setup_frame_addresses (void)
6924 cfun->machine->accesses_prev_frame = 1;
6927 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
6928 # define USE_HIDDEN_LINKONCE 1
6930 # define USE_HIDDEN_LINKONCE 0
6933 static int pic_labels_used;
6935 /* Fills in the label name that should be used for a pc thunk for
6936 the given register. */
6939 get_pc_thunk_name (char name[32], unsigned int regno)
6941 gcc_assert (!TARGET_64BIT);
6943 if (USE_HIDDEN_LINKONCE)
6944 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
6946 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6950 /* This function generates code for -fpic that loads %ebx with
6951 the return address of the caller and then returns. */
6954 ix86_file_end (void)
6959 for (regno = 0; regno < 8; ++regno)
6963 if (! ((pic_labels_used >> regno) & 1))
6966 get_pc_thunk_name (name, regno);
6971 switch_to_section (darwin_sections[text_coal_section]);
6972 fputs ("\t.weak_definition\t", asm_out_file);
6973 assemble_name (asm_out_file, name);
6974 fputs ("\n\t.private_extern\t", asm_out_file);
6975 assemble_name (asm_out_file, name);
6976 fputs ("\n", asm_out_file);
6977 ASM_OUTPUT_LABEL (asm_out_file, name);
6981 if (USE_HIDDEN_LINKONCE)
6985 decl = build_decl (FUNCTION_DECL, get_identifier (name),
6987 TREE_PUBLIC (decl) = 1;
6988 TREE_STATIC (decl) = 1;
6989 DECL_ONE_ONLY (decl) = 1;
6991 (*targetm.asm_out.unique_section) (decl, 0);
6992 switch_to_section (get_named_section (decl, NULL, 0));
6994 (*targetm.asm_out.globalize_label) (asm_out_file, name);
6995 fputs ("\t.hidden\t", asm_out_file);
6996 assemble_name (asm_out_file, name);
6997 fputc ('\n', asm_out_file);
6998 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7002 switch_to_section (text_section);
7003 ASM_OUTPUT_LABEL (asm_out_file, name);
7006 xops[0] = gen_rtx_REG (Pmode, regno);
7007 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7008 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7009 output_asm_insn ("ret", xops);
7012 if (NEED_INDICATE_EXEC_STACK)
7013 file_end_indicate_exec_stack ();
7016 /* Emit code for the SET_GOT patterns. */
7019 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7025 if (TARGET_VXWORKS_RTP && flag_pic)
7027 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7028 xops[2] = gen_rtx_MEM (Pmode,
7029 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7030 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7032 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7033 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7034 an unadorned address. */
7035 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7036 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7037 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7041 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7043 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7045 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7048 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7050 output_asm_insn ("call\t%a2", xops);
7053 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7054 is what will be referenced by the Mach-O PIC subsystem. */
7056 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
7059 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7060 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7063 output_asm_insn ("pop%z0\t%0", xops);
7068 get_pc_thunk_name (name, REGNO (dest));
7069 pic_labels_used |= 1 << REGNO (dest);
7071 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7072 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7073 output_asm_insn ("call\t%X2", xops);
7074 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7075 is what will be referenced by the Mach-O PIC subsystem. */
7078 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
7080 targetm.asm_out.internal_label (asm_out_file, "L",
7081 CODE_LABEL_NUMBER (label));
7088 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7089 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7091 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7096 /* Generate an "push" pattern for input ARG. */
7101 return gen_rtx_SET (VOIDmode,
7103 gen_rtx_PRE_DEC (Pmode,
7104 stack_pointer_rtx)),
7108 /* Return >= 0 if there is an unused call-clobbered register available
7109 for the entire function. */
7112 ix86_select_alt_pic_regnum (void)
7114 if (current_function_is_leaf && !crtl->profile
7115 && !ix86_current_function_calls_tls_descriptor)
7118 for (i = 2; i >= 0; --i)
7119 if (!df_regs_ever_live_p (i))
7123 return INVALID_REGNUM;
7126 /* Return 1 if we need to save REGNO. */
7128 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7130 if (pic_offset_table_rtx
7131 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7132 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7134 || crtl->calls_eh_return
7135 || crtl->uses_const_pool))
7137 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7142 if (crtl->calls_eh_return && maybe_eh_return)
7147 unsigned test = EH_RETURN_DATA_REGNO (i);
7148 if (test == INVALID_REGNUM)
7155 if (cfun->machine->force_align_arg_pointer
7156 && regno == REGNO (cfun->machine->force_align_arg_pointer))
7159 return (df_regs_ever_live_p (regno)
7160 && !call_used_regs[regno]
7161 && !fixed_regs[regno]
7162 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7165 /* Return number of registers to be saved on the stack. */
7168 ix86_nsaved_regs (void)
7173 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7174 if (ix86_save_reg (regno, true))
7179 /* Return the offset between two registers, one to be eliminated, and the other
7180 its replacement, at the start of a routine. */
7183 ix86_initial_elimination_offset (int from, int to)
7185 struct ix86_frame frame;
7186 ix86_compute_frame_layout (&frame);
7188 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7189 return frame.hard_frame_pointer_offset;
7190 else if (from == FRAME_POINTER_REGNUM
7191 && to == HARD_FRAME_POINTER_REGNUM)
7192 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7195 gcc_assert (to == STACK_POINTER_REGNUM);
7197 if (from == ARG_POINTER_REGNUM)
7198 return frame.stack_pointer_offset;
7200 gcc_assert (from == FRAME_POINTER_REGNUM);
7201 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7205 /* Fill structure ix86_frame about frame of currently computed function. */
7208 ix86_compute_frame_layout (struct ix86_frame *frame)
7210 HOST_WIDE_INT total_size;
7211 unsigned int stack_alignment_needed;
7212 HOST_WIDE_INT offset;
7213 unsigned int preferred_alignment;
7214 HOST_WIDE_INT size = get_frame_size ();
7216 frame->nregs = ix86_nsaved_regs ();
7219 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7220 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7222 /* During reload iteration the amount of registers saved can change.
7223 Recompute the value as needed. Do not recompute when amount of registers
7224 didn't change as reload does multiple calls to the function and does not
7225 expect the decision to change within single iteration. */
7227 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7229 int count = frame->nregs;
7231 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7232 /* The fast prologue uses move instead of push to save registers. This
7233 is significantly longer, but also executes faster as modern hardware
7234 can execute the moves in parallel, but can't do that for push/pop.
7236 Be careful about choosing what prologue to emit: When function takes
7237 many instructions to execute we may use slow version as well as in
7238 case function is known to be outside hot spot (this is known with
7239 feedback only). Weight the size of function by number of registers
7240 to save as it is cheap to use one or two push instructions but very
7241 slow to use many of them. */
7243 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7244 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7245 || (flag_branch_probabilities
7246 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7247 cfun->machine->use_fast_prologue_epilogue = false;
7249 cfun->machine->use_fast_prologue_epilogue
7250 = !expensive_function_p (count);
7252 if (TARGET_PROLOGUE_USING_MOVE
7253 && cfun->machine->use_fast_prologue_epilogue)
7254 frame->save_regs_using_mov = true;
7256 frame->save_regs_using_mov = false;
7259 /* Skip return address and saved base pointer. */
7260 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7262 frame->hard_frame_pointer_offset = offset;
7264 /* Do some sanity checking of stack_alignment_needed and
7265 preferred_alignment, since i386 port is the only using those features
7266 that may break easily. */
7268 gcc_assert (!size || stack_alignment_needed);
7269 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7270 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
7271 gcc_assert (stack_alignment_needed
7272 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
7274 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
7275 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
7277 /* Register save area */
7278 offset += frame->nregs * UNITS_PER_WORD;
7281 if (ix86_save_varrargs_registers)
7283 offset += X86_64_VARARGS_SIZE;
7284 frame->va_arg_size = X86_64_VARARGS_SIZE;
7287 frame->va_arg_size = 0;
7289 /* Align start of frame for local function. */
7290 frame->padding1 = ((offset + stack_alignment_needed - 1)
7291 & -stack_alignment_needed) - offset;
7293 offset += frame->padding1;
7295 /* Frame pointer points here. */
7296 frame->frame_pointer_offset = offset;
7300 /* Add outgoing arguments area. Can be skipped if we eliminated
7301 all the function calls as dead code.
7302 Skipping is however impossible when function calls alloca. Alloca
7303 expander assumes that last crtl->outgoing_args_size
7304 of stack frame are unused. */
7305 if (ACCUMULATE_OUTGOING_ARGS
7306 && (!current_function_is_leaf || cfun->calls_alloca
7307 || ix86_current_function_calls_tls_descriptor))
7309 offset += crtl->outgoing_args_size;
7310 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7313 frame->outgoing_arguments_size = 0;
7315 /* Align stack boundary. Only needed if we're calling another function
7317 if (!current_function_is_leaf || cfun->calls_alloca
7318 || ix86_current_function_calls_tls_descriptor)
7319 frame->padding2 = ((offset + preferred_alignment - 1)
7320 & -preferred_alignment) - offset;
7322 frame->padding2 = 0;
7324 offset += frame->padding2;
7326 /* We've reached end of stack frame. */
7327 frame->stack_pointer_offset = offset;
7329 /* Size prologue needs to allocate. */
7330 frame->to_allocate =
7331 (size + frame->padding1 + frame->padding2
7332 + frame->outgoing_arguments_size + frame->va_arg_size);
7334 if ((!frame->to_allocate && frame->nregs <= 1)
7335 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7336 frame->save_regs_using_mov = false;
7338 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7339 && current_function_is_leaf
7340 && !ix86_current_function_calls_tls_descriptor)
7342 frame->red_zone_size = frame->to_allocate;
7343 if (frame->save_regs_using_mov)
7344 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7345 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7346 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7349 frame->red_zone_size = 0;
7350 frame->to_allocate -= frame->red_zone_size;
7351 frame->stack_pointer_offset -= frame->red_zone_size;
7353 fprintf (stderr, "\n");
7354 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7355 fprintf (stderr, "size: %ld\n", (long)size);
7356 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7357 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7358 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7359 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7360 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7361 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7362 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7363 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7364 (long)frame->hard_frame_pointer_offset);
7365 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7366 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7367 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7368 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7372 /* Emit code to save registers in the prologue. */
7375 ix86_emit_save_regs (void)
7380 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
7381 if (ix86_save_reg (regno, true))
7383 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7384 RTX_FRAME_RELATED_P (insn) = 1;
7388 /* Emit code to save registers using MOV insns. First register
7389 is restored from POINTER + OFFSET. */
7391 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7396 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7397 if (ix86_save_reg (regno, true))
7399 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7401 gen_rtx_REG (Pmode, regno));
7402 RTX_FRAME_RELATED_P (insn) = 1;
7403 offset += UNITS_PER_WORD;
7407 /* Expand prologue or epilogue stack adjustment.
7408 The pattern exist to put a dependency on all ebp-based memory accesses.
7409 STYLE should be negative if instructions should be marked as frame related,
7410 zero if %r11 register is live and cannot be freely used and positive
7414 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7419 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7420 else if (x86_64_immediate_operand (offset, DImode))
7421 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7425 /* r11 is used by indirect sibcall return as well, set before the
7426 epilogue and used after the epilogue. ATM indirect sibcall
7427 shouldn't be used together with huge frame sizes in one
7428 function because of the frame_size check in sibcall.c. */
7430 r11 = gen_rtx_REG (DImode, R11_REG);
7431 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
7433 RTX_FRAME_RELATED_P (insn) = 1;
7434 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
7438 RTX_FRAME_RELATED_P (insn) = 1;
7441 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7444 ix86_internal_arg_pointer (void)
7446 bool has_force_align_arg_pointer =
7447 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
7448 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
7449 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
7450 && DECL_NAME (current_function_decl)
7451 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7452 && DECL_FILE_SCOPE_P (current_function_decl))
7453 || ix86_force_align_arg_pointer
7454 || has_force_align_arg_pointer)
7456 /* Nested functions can't realign the stack due to a register
7458 if (DECL_CONTEXT (current_function_decl)
7459 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
7461 if (ix86_force_align_arg_pointer)
7462 warning (0, "-mstackrealign ignored for nested functions");
7463 if (has_force_align_arg_pointer)
7464 error ("%s not supported for nested functions",
7465 ix86_force_align_arg_pointer_string);
7466 return virtual_incoming_args_rtx;
7468 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
7469 return copy_to_reg (cfun->machine->force_align_arg_pointer);
7472 return virtual_incoming_args_rtx;
7475 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
7476 This is called from dwarf2out.c to emit call frame instructions
7477 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
7479 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
7481 rtx unspec = SET_SRC (pattern);
7482 gcc_assert (GET_CODE (unspec) == UNSPEC);
7486 case UNSPEC_REG_SAVE:
7487 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
7488 SET_DEST (pattern));
7490 case UNSPEC_DEF_CFA:
7491 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
7492 INTVAL (XVECEXP (unspec, 0, 0)));
7499 /* Expand the prologue into a bunch of separate insns. */
7502 ix86_expand_prologue (void)
7506 struct ix86_frame frame;
7507 HOST_WIDE_INT allocate;
7509 ix86_compute_frame_layout (&frame);
7511 if (cfun->machine->force_align_arg_pointer)
7515 /* Grab the argument pointer. */
7516 x = plus_constant (stack_pointer_rtx, 4);
7517 y = cfun->machine->force_align_arg_pointer;
7518 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
7519 RTX_FRAME_RELATED_P (insn) = 1;
7521 /* The unwind info consists of two parts: install the fafp as the cfa,
7522 and record the fafp as the "save register" of the stack pointer.
7523 The later is there in order that the unwinder can see where it
7524 should restore the stack pointer across the and insn. */
7525 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
7526 x = gen_rtx_SET (VOIDmode, y, x);
7527 RTX_FRAME_RELATED_P (x) = 1;
7528 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
7530 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
7531 RTX_FRAME_RELATED_P (y) = 1;
7532 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
7533 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
7534 REG_NOTES (insn) = x;
7536 /* Align the stack. */
7537 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
7540 /* And here we cheat like madmen with the unwind info. We force the
7541 cfa register back to sp+4, which is exactly what it was at the
7542 start of the function. Re-pushing the return address results in
7543 the return at the same spot relative to the cfa, and thus is
7544 correct wrt the unwind info. */
7545 x = cfun->machine->force_align_arg_pointer;
7546 x = gen_frame_mem (Pmode, plus_constant (x, -4));
7547 insn = emit_insn (gen_push (x));
7548 RTX_FRAME_RELATED_P (insn) = 1;
7551 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
7552 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
7553 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
7554 REG_NOTES (insn) = x;
7557 /* Note: AT&T enter does NOT have reversed args. Enter is probably
7558 slower on all targets. Also sdb doesn't like it. */
7560 if (frame_pointer_needed)
7562 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
7563 RTX_FRAME_RELATED_P (insn) = 1;
7565 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
7566 RTX_FRAME_RELATED_P (insn) = 1;
7569 allocate = frame.to_allocate;
7571 if (!frame.save_regs_using_mov)
7572 ix86_emit_save_regs ();
7574 allocate += frame.nregs * UNITS_PER_WORD;
7576 /* When using red zone we may start register saving before allocating
7577 the stack frame saving one cycle of the prologue. However I will
7578 avoid doing this if I am going to have to probe the stack since
7579 at least on x86_64 the stack probe can turn into a call that clobbers
7580 a red zone location */
7581 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
7582 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
7583 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
7584 : stack_pointer_rtx,
7585 -frame.nregs * UNITS_PER_WORD);
7589 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
7590 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7591 GEN_INT (-allocate), -1);
7594 /* Only valid for Win32. */
7595 rtx eax = gen_rtx_REG (Pmode, AX_REG);
7599 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
7601 if (cfun->machine->call_abi == MS_ABI)
7604 eax_live = ix86_eax_live_at_start_p ();
7608 emit_insn (gen_push (eax));
7609 allocate -= UNITS_PER_WORD;
7612 emit_move_insn (eax, GEN_INT (allocate));
7615 insn = gen_allocate_stack_worker_64 (eax);
7617 insn = gen_allocate_stack_worker_32 (eax);
7618 insn = emit_insn (insn);
7619 RTX_FRAME_RELATED_P (insn) = 1;
7620 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
7621 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
7622 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
7623 t, REG_NOTES (insn));
7627 if (frame_pointer_needed)
7628 t = plus_constant (hard_frame_pointer_rtx,
7631 - frame.nregs * UNITS_PER_WORD);
7633 t = plus_constant (stack_pointer_rtx, allocate);
7634 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
7638 if (frame.save_regs_using_mov
7639 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
7640 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
7642 if (!frame_pointer_needed || !frame.to_allocate)
7643 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
7645 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
7646 -frame.nregs * UNITS_PER_WORD);
7649 pic_reg_used = false;
7650 if (pic_offset_table_rtx
7651 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7654 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
7656 if (alt_pic_reg_used != INVALID_REGNUM)
7657 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
7659 pic_reg_used = true;
7666 if (ix86_cmodel == CM_LARGE_PIC)
7668 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
7669 rtx label = gen_label_rtx ();
7671 LABEL_PRESERVE_P (label) = 1;
7672 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
7673 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
7674 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7675 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
7676 pic_offset_table_rtx, tmp_reg));
7679 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7682 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
7685 /* Prevent function calls from being scheduled before the call to mcount.
7686 In the pic_reg_used case, make sure that the got load isn't deleted. */
7690 emit_insn (gen_prologue_use (pic_offset_table_rtx));
7691 emit_insn (gen_blockage ());
7694 /* Emit cld instruction if stringops are used in the function. */
7695 if (TARGET_CLD && ix86_current_function_needs_cld)
7696 emit_insn (gen_cld ());
7699 /* Emit code to restore saved registers using MOV insns. First register
7700 is restored from POINTER + OFFSET. */
7702 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
7703 int maybe_eh_return)
7706 rtx base_address = gen_rtx_MEM (Pmode, pointer);
7708 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7709 if (ix86_save_reg (regno, maybe_eh_return))
7711 /* Ensure that adjust_address won't be forced to produce pointer
7712 out of range allowed by x86-64 instruction set. */
7713 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
7717 r11 = gen_rtx_REG (DImode, R11_REG);
7718 emit_move_insn (r11, GEN_INT (offset));
7719 emit_insn (gen_adddi3 (r11, r11, pointer));
7720 base_address = gen_rtx_MEM (Pmode, r11);
7723 emit_move_insn (gen_rtx_REG (Pmode, regno),
7724 adjust_address (base_address, Pmode, offset));
7725 offset += UNITS_PER_WORD;
7729 /* Restore function stack, frame, and registers. */
7732 ix86_expand_epilogue (int style)
7735 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
7736 struct ix86_frame frame;
7737 HOST_WIDE_INT offset;
7739 ix86_compute_frame_layout (&frame);
7741 /* Calculate start of saved registers relative to ebp. Special care
7742 must be taken for the normal return case of a function using
7743 eh_return: the eax and edx registers are marked as saved, but not
7744 restored along this path. */
7745 offset = frame.nregs;
7746 if (crtl->calls_eh_return && style != 2)
7748 offset *= -UNITS_PER_WORD;
7750 /* If we're only restoring one register and sp is not valid then
7751 using a move instruction to restore the register since it's
7752 less work than reloading sp and popping the register.
7754 The default code result in stack adjustment using add/lea instruction,
7755 while this code results in LEAVE instruction (or discrete equivalent),
7756 so it is profitable in some other cases as well. Especially when there
7757 are no registers to restore. We also use this code when TARGET_USE_LEAVE
7758 and there is exactly one register to pop. This heuristic may need some
7759 tuning in future. */
7760 if ((!sp_valid && frame.nregs <= 1)
7761 || (TARGET_EPILOGUE_USING_MOVE
7762 && cfun->machine->use_fast_prologue_epilogue
7763 && (frame.nregs > 1 || frame.to_allocate))
7764 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
7765 || (frame_pointer_needed && TARGET_USE_LEAVE
7766 && cfun->machine->use_fast_prologue_epilogue
7767 && frame.nregs == 1)
7768 || crtl->calls_eh_return)
7770 /* Restore registers. We can use ebp or esp to address the memory
7771 locations. If both are available, default to ebp, since offsets
7772 are known to be small. Only exception is esp pointing directly to the
7773 end of block of saved registers, where we may simplify addressing
7776 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
7777 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
7778 frame.to_allocate, style == 2);
7780 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
7781 offset, style == 2);
7783 /* eh_return epilogues need %ecx added to the stack pointer. */
7786 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
7788 if (frame_pointer_needed)
7790 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
7791 tmp = plus_constant (tmp, UNITS_PER_WORD);
7792 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
7794 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
7795 emit_move_insn (hard_frame_pointer_rtx, tmp);
7797 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
7802 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
7803 tmp = plus_constant (tmp, (frame.to_allocate
7804 + frame.nregs * UNITS_PER_WORD));
7805 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
7808 else if (!frame_pointer_needed)
7809 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7810 GEN_INT (frame.to_allocate
7811 + frame.nregs * UNITS_PER_WORD),
7813 /* If not an i386, mov & pop is faster than "leave". */
7814 else if (TARGET_USE_LEAVE || optimize_size
7815 || !cfun->machine->use_fast_prologue_epilogue)
7816 emit_insn ((*ix86_gen_leave) ());
7819 pro_epilogue_adjust_stack (stack_pointer_rtx,
7820 hard_frame_pointer_rtx,
7823 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
7828 /* First step is to deallocate the stack frame so that we can
7829 pop the registers. */
7832 gcc_assert (frame_pointer_needed);
7833 pro_epilogue_adjust_stack (stack_pointer_rtx,
7834 hard_frame_pointer_rtx,
7835 GEN_INT (offset), style);
7837 else if (frame.to_allocate)
7838 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7839 GEN_INT (frame.to_allocate), style);
7841 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7842 if (ix86_save_reg (regno, false))
7843 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
7844 if (frame_pointer_needed)
7846 /* Leave results in shorter dependency chains on CPUs that are
7847 able to grok it fast. */
7848 if (TARGET_USE_LEAVE)
7849 emit_insn ((*ix86_gen_leave) ());
7851 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
7855 if (cfun->machine->force_align_arg_pointer)
7857 emit_insn (gen_addsi3 (stack_pointer_rtx,
7858 cfun->machine->force_align_arg_pointer,
7862 /* Sibcall epilogues don't want a return instruction. */
7866 if (crtl->args.pops_args && crtl->args.size)
7868 rtx popc = GEN_INT (crtl->args.pops_args);
7870 /* i386 can only pop 64K bytes. If asked to pop more, pop
7871 return address, do explicit add, and jump indirectly to the
7874 if (crtl->args.pops_args >= 65536)
7876 rtx ecx = gen_rtx_REG (SImode, CX_REG);
7878 /* There is no "pascal" calling convention in any 64bit ABI. */
7879 gcc_assert (!TARGET_64BIT);
7881 emit_insn (gen_popsi1 (ecx));
7882 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
7883 emit_jump_insn (gen_return_indirect_internal (ecx));
7886 emit_jump_insn (gen_return_pop_internal (popc));
7889 emit_jump_insn (gen_return_internal ());
7892 /* Reset from the function's potential modifications. */
7895 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7896 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7898 if (pic_offset_table_rtx)
7899 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
7901 /* Mach-O doesn't support labels at the end of objects, so if
7902 it looks like we might want one, insert a NOP. */
7904 rtx insn = get_last_insn ();
7907 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
7908 insn = PREV_INSN (insn);
7912 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
7913 fputs ("\tnop\n", file);
7919 /* Extract the parts of an RTL expression that is a valid memory address
7920 for an instruction. Return 0 if the structure of the address is
7921 grossly off. Return -1 if the address contains ASHIFT, so it is not
7922 strictly valid, but still used for computing length of lea instruction. */
7925 ix86_decompose_address (rtx addr, struct ix86_address *out)
7927 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
7928 rtx base_reg, index_reg;
7929 HOST_WIDE_INT scale = 1;
7930 rtx scale_rtx = NULL_RTX;
7932 enum ix86_address_seg seg = SEG_DEFAULT;
7934 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
7936 else if (GET_CODE (addr) == PLUS)
7946 addends[n++] = XEXP (op, 1);
7949 while (GET_CODE (op) == PLUS);
7954 for (i = n; i >= 0; --i)
7957 switch (GET_CODE (op))
7962 index = XEXP (op, 0);
7963 scale_rtx = XEXP (op, 1);
7967 if (XINT (op, 1) == UNSPEC_TP
7968 && TARGET_TLS_DIRECT_SEG_REFS
7969 && seg == SEG_DEFAULT)
7970 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
7999 else if (GET_CODE (addr) == MULT)
8001 index = XEXP (addr, 0); /* index*scale */
8002 scale_rtx = XEXP (addr, 1);
8004 else if (GET_CODE (addr) == ASHIFT)
8008 /* We're called for lea too, which implements ashift on occasion. */
8009 index = XEXP (addr, 0);
8010 tmp = XEXP (addr, 1);
8011 if (!CONST_INT_P (tmp))
8013 scale = INTVAL (tmp);
8014 if ((unsigned HOST_WIDE_INT) scale > 3)
8020 disp = addr; /* displacement */
8022 /* Extract the integral value of scale. */
8025 if (!CONST_INT_P (scale_rtx))
8027 scale = INTVAL (scale_rtx);
8030 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8031 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8033 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8034 if (base_reg && index_reg && scale == 1
8035 && (index_reg == arg_pointer_rtx
8036 || index_reg == frame_pointer_rtx
8037 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8040 tmp = base, base = index, index = tmp;
8041 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8044 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8045 if ((base_reg == hard_frame_pointer_rtx
8046 || base_reg == frame_pointer_rtx
8047 || base_reg == arg_pointer_rtx) && !disp)
8050 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8051 Avoid this by transforming to [%esi+0]. */
8052 if (TARGET_K6 && !optimize_size
8053 && base_reg && !index_reg && !disp
8055 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8058 /* Special case: encode reg+reg instead of reg*2. */
8059 if (!base && index && scale && scale == 2)
8060 base = index, base_reg = index_reg, scale = 1;
8062 /* Special case: scaling cannot be encoded without base or displacement. */
8063 if (!base && !disp && index && scale != 1)
8075 /* Return cost of the memory address x.
8076 For i386, it is better to use a complex address than let gcc copy
8077 the address into a reg and make a new pseudo. But not if the address
8078 requires to two regs - that would mean more pseudos with longer
8081 ix86_address_cost (rtx x)
8083 struct ix86_address parts;
8085 int ok = ix86_decompose_address (x, &parts);
8089 if (parts.base && GET_CODE (parts.base) == SUBREG)
8090 parts.base = SUBREG_REG (parts.base);
8091 if (parts.index && GET_CODE (parts.index) == SUBREG)
8092 parts.index = SUBREG_REG (parts.index);
8094 /* Attempt to minimize number of registers in the address. */
8096 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8098 && (!REG_P (parts.index)
8099 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8103 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8105 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8106 && parts.base != parts.index)
8109 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8110 since it's predecode logic can't detect the length of instructions
8111 and it degenerates to vector decoded. Increase cost of such
8112 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8113 to split such addresses or even refuse such addresses at all.
8115 Following addressing modes are affected:
8120 The first and last case may be avoidable by explicitly coding the zero in
8121 memory address, but I don't have AMD-K6 machine handy to check this
8125 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8126 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8127 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8133 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8134 this is used for to form addresses to local data when -fPIC is in
8138 darwin_local_data_pic (rtx disp)
8140 if (GET_CODE (disp) == MINUS)
8142 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
8143 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
8144 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
8146 const char *sym_name = XSTR (XEXP (disp, 1), 0);
8147 if (! strcmp (sym_name, "<pic base>"))
8155 /* Determine if a given RTX is a valid constant. We already know this
8156 satisfies CONSTANT_P. */
8159 legitimate_constant_p (rtx x)
8161 switch (GET_CODE (x))
8166 if (GET_CODE (x) == PLUS)
8168 if (!CONST_INT_P (XEXP (x, 1)))
8173 if (TARGET_MACHO && darwin_local_data_pic (x))
8176 /* Only some unspecs are valid as "constants". */
8177 if (GET_CODE (x) == UNSPEC)
8178 switch (XINT (x, 1))
8183 return TARGET_64BIT;
8186 x = XVECEXP (x, 0, 0);
8187 return (GET_CODE (x) == SYMBOL_REF
8188 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8190 x = XVECEXP (x, 0, 0);
8191 return (GET_CODE (x) == SYMBOL_REF
8192 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
8197 /* We must have drilled down to a symbol. */
8198 if (GET_CODE (x) == LABEL_REF)
8200 if (GET_CODE (x) != SYMBOL_REF)
8205 /* TLS symbols are never valid. */
8206 if (SYMBOL_REF_TLS_MODEL (x))
8209 /* DLLIMPORT symbols are never valid. */
8210 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
8211 && SYMBOL_REF_DLLIMPORT_P (x))
8216 if (GET_MODE (x) == TImode
8217 && x != CONST0_RTX (TImode)
8223 if (x == CONST0_RTX (GET_MODE (x)))
8231 /* Otherwise we handle everything else in the move patterns. */
8235 /* Determine if it's legal to put X into the constant pool. This
8236 is not possible for the address of thread-local symbols, which
8237 is checked above. */
8240 ix86_cannot_force_const_mem (rtx x)
8242 /* We can always put integral constants and vectors in memory. */
8243 switch (GET_CODE (x))
8253 return !legitimate_constant_p (x);
8256 /* Determine if a given RTX is a valid constant address. */
8259 constant_address_p (rtx x)
8261 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
8264 /* Nonzero if the constant value X is a legitimate general operand
8265 when generating PIC code. It is given that flag_pic is on and
8266 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
8269 legitimate_pic_operand_p (rtx x)
8273 switch (GET_CODE (x))
8276 inner = XEXP (x, 0);
8277 if (GET_CODE (inner) == PLUS
8278 && CONST_INT_P (XEXP (inner, 1)))
8279 inner = XEXP (inner, 0);
8281 /* Only some unspecs are valid as "constants". */
8282 if (GET_CODE (inner) == UNSPEC)
8283 switch (XINT (inner, 1))
8288 return TARGET_64BIT;
8290 x = XVECEXP (inner, 0, 0);
8291 return (GET_CODE (x) == SYMBOL_REF
8292 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8300 return legitimate_pic_address_disp_p (x);
8307 /* Determine if a given CONST RTX is a valid memory displacement
8311 legitimate_pic_address_disp_p (rtx disp)
8315 /* In 64bit mode we can allow direct addresses of symbols and labels
8316 when they are not dynamic symbols. */
8319 rtx op0 = disp, op1;
8321 switch (GET_CODE (disp))
8327 if (GET_CODE (XEXP (disp, 0)) != PLUS)
8329 op0 = XEXP (XEXP (disp, 0), 0);
8330 op1 = XEXP (XEXP (disp, 0), 1);
8331 if (!CONST_INT_P (op1)
8332 || INTVAL (op1) >= 16*1024*1024
8333 || INTVAL (op1) < -16*1024*1024)
8335 if (GET_CODE (op0) == LABEL_REF)
8337 if (GET_CODE (op0) != SYMBOL_REF)
8342 /* TLS references should always be enclosed in UNSPEC. */
8343 if (SYMBOL_REF_TLS_MODEL (op0))
8345 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
8346 && ix86_cmodel != CM_LARGE_PIC)
8354 if (GET_CODE (disp) != CONST)
8356 disp = XEXP (disp, 0);
8360 /* We are unsafe to allow PLUS expressions. This limit allowed distance
8361 of GOT tables. We should not need these anyway. */
8362 if (GET_CODE (disp) != UNSPEC
8363 || (XINT (disp, 1) != UNSPEC_GOTPCREL
8364 && XINT (disp, 1) != UNSPEC_GOTOFF
8365 && XINT (disp, 1) != UNSPEC_PLTOFF))
8368 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
8369 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
8375 if (GET_CODE (disp) == PLUS)
8377 if (!CONST_INT_P (XEXP (disp, 1)))
8379 disp = XEXP (disp, 0);
8383 if (TARGET_MACHO && darwin_local_data_pic (disp))
8386 if (GET_CODE (disp) != UNSPEC)
8389 switch (XINT (disp, 1))
8394 /* We need to check for both symbols and labels because VxWorks loads
8395 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
8397 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8398 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
8400 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
8401 While ABI specify also 32bit relocation but we don't produce it in
8402 small PIC model at all. */
8403 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8404 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
8406 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
8408 case UNSPEC_GOTTPOFF:
8409 case UNSPEC_GOTNTPOFF:
8410 case UNSPEC_INDNTPOFF:
8413 disp = XVECEXP (disp, 0, 0);
8414 return (GET_CODE (disp) == SYMBOL_REF
8415 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
8417 disp = XVECEXP (disp, 0, 0);
8418 return (GET_CODE (disp) == SYMBOL_REF
8419 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
8421 disp = XVECEXP (disp, 0, 0);
8422 return (GET_CODE (disp) == SYMBOL_REF
8423 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
8429 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
8430 memory address for an instruction. The MODE argument is the machine mode
8431 for the MEM expression that wants to use this address.
8433 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
8434 convert common non-canonical forms to canonical form so that they will
8438 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
8439 rtx addr, int strict)
8441 struct ix86_address parts;
8442 rtx base, index, disp;
8443 HOST_WIDE_INT scale;
8444 const char *reason = NULL;
8445 rtx reason_rtx = NULL_RTX;
8447 if (ix86_decompose_address (addr, &parts) <= 0)
8449 reason = "decomposition failed";
8454 index = parts.index;
8456 scale = parts.scale;
8458 /* Validate base register.
8460 Don't allow SUBREG's that span more than a word here. It can lead to spill
8461 failures when the base is one word out of a two word structure, which is
8462 represented internally as a DImode int. */
8471 else if (GET_CODE (base) == SUBREG
8472 && REG_P (SUBREG_REG (base))
8473 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
8475 reg = SUBREG_REG (base);
8478 reason = "base is not a register";
8482 if (GET_MODE (base) != Pmode)
8484 reason = "base is not in Pmode";
8488 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
8489 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
8491 reason = "base is not valid";
8496 /* Validate index register.
8498 Don't allow SUBREG's that span more than a word here -- same as above. */
8507 else if (GET_CODE (index) == SUBREG
8508 && REG_P (SUBREG_REG (index))
8509 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
8511 reg = SUBREG_REG (index);
8514 reason = "index is not a register";
8518 if (GET_MODE (index) != Pmode)
8520 reason = "index is not in Pmode";
8524 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
8525 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
8527 reason = "index is not valid";
8532 /* Validate scale factor. */
8535 reason_rtx = GEN_INT (scale);
8538 reason = "scale without index";
8542 if (scale != 2 && scale != 4 && scale != 8)
8544 reason = "scale is not a valid multiplier";
8549 /* Validate displacement. */
8554 if (GET_CODE (disp) == CONST
8555 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
8556 switch (XINT (XEXP (disp, 0), 1))
8558 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
8559 used. While ABI specify also 32bit relocations, we don't produce
8560 them at all and use IP relative instead. */
8563 gcc_assert (flag_pic);
8565 goto is_legitimate_pic;
8566 reason = "64bit address unspec";
8569 case UNSPEC_GOTPCREL:
8570 gcc_assert (flag_pic);
8571 goto is_legitimate_pic;
8573 case UNSPEC_GOTTPOFF:
8574 case UNSPEC_GOTNTPOFF:
8575 case UNSPEC_INDNTPOFF:
8581 reason = "invalid address unspec";
8585 else if (SYMBOLIC_CONST (disp)
8589 && MACHOPIC_INDIRECT
8590 && !machopic_operand_p (disp)
8596 if (TARGET_64BIT && (index || base))
8598 /* foo@dtpoff(%rX) is ok. */
8599 if (GET_CODE (disp) != CONST
8600 || GET_CODE (XEXP (disp, 0)) != PLUS
8601 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
8602 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
8603 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
8604 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
8606 reason = "non-constant pic memory reference";
8610 else if (! legitimate_pic_address_disp_p (disp))
8612 reason = "displacement is an invalid pic construct";
8616 /* This code used to verify that a symbolic pic displacement
8617 includes the pic_offset_table_rtx register.
8619 While this is good idea, unfortunately these constructs may
8620 be created by "adds using lea" optimization for incorrect
8629 This code is nonsensical, but results in addressing
8630 GOT table with pic_offset_table_rtx base. We can't
8631 just refuse it easily, since it gets matched by
8632 "addsi3" pattern, that later gets split to lea in the
8633 case output register differs from input. While this
8634 can be handled by separate addsi pattern for this case
8635 that never results in lea, this seems to be easier and
8636 correct fix for crash to disable this test. */
8638 else if (GET_CODE (disp) != LABEL_REF
8639 && !CONST_INT_P (disp)
8640 && (GET_CODE (disp) != CONST
8641 || !legitimate_constant_p (disp))
8642 && (GET_CODE (disp) != SYMBOL_REF
8643 || !legitimate_constant_p (disp)))
8645 reason = "displacement is not constant";
8648 else if (TARGET_64BIT
8649 && !x86_64_immediate_operand (disp, VOIDmode))
8651 reason = "displacement is out of range";
8656 /* Everything looks valid. */
8663 /* Return a unique alias set for the GOT. */
8665 static alias_set_type
8666 ix86_GOT_alias_set (void)
8668 static alias_set_type set = -1;
8670 set = new_alias_set ();
8674 /* Return a legitimate reference for ORIG (an address) using the
8675 register REG. If REG is 0, a new pseudo is generated.
8677 There are two types of references that must be handled:
8679 1. Global data references must load the address from the GOT, via
8680 the PIC reg. An insn is emitted to do this load, and the reg is
8683 2. Static data references, constant pool addresses, and code labels
8684 compute the address as an offset from the GOT, whose base is in
8685 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
8686 differentiate them from global data objects. The returned
8687 address is the PIC reg + an unspec constant.
8689 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
8690 reg also appears in the address. */
8693 legitimize_pic_address (rtx orig, rtx reg)
8700 if (TARGET_MACHO && !TARGET_64BIT)
8703 reg = gen_reg_rtx (Pmode);
8704 /* Use the generic Mach-O PIC machinery. */
8705 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
8709 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
8711 else if (TARGET_64BIT
8712 && ix86_cmodel != CM_SMALL_PIC
8713 && gotoff_operand (addr, Pmode))
8716 /* This symbol may be referenced via a displacement from the PIC
8717 base address (@GOTOFF). */
8719 if (reload_in_progress)
8720 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
8721 if (GET_CODE (addr) == CONST)
8722 addr = XEXP (addr, 0);
8723 if (GET_CODE (addr) == PLUS)
8725 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
8727 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
8730 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
8731 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
8733 tmpreg = gen_reg_rtx (Pmode);
8736 emit_move_insn (tmpreg, new_rtx);
8740 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
8741 tmpreg, 1, OPTAB_DIRECT);
8744 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
8746 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
8748 /* This symbol may be referenced via a displacement from the PIC
8749 base address (@GOTOFF). */
8751 if (reload_in_progress)
8752 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
8753 if (GET_CODE (addr) == CONST)
8754 addr = XEXP (addr, 0);
8755 if (GET_CODE (addr) == PLUS)
8757 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
8759 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
8762 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
8763 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
8764 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
8768 emit_move_insn (reg, new_rtx);
8772 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
8773 /* We can't use @GOTOFF for text labels on VxWorks;
8774 see gotoff_operand. */
8775 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
8777 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8779 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
8780 return legitimize_dllimport_symbol (addr, true);
8781 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
8782 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
8783 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
8785 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
8786 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
8790 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
8792 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
8793 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
8794 new_rtx = gen_const_mem (Pmode, new_rtx);
8795 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
8798 reg = gen_reg_rtx (Pmode);
8799 /* Use directly gen_movsi, otherwise the address is loaded
8800 into register for CSE. We don't want to CSE this addresses,
8801 instead we CSE addresses from the GOT table, so skip this. */
8802 emit_insn (gen_movsi (reg, new_rtx));
8807 /* This symbol must be referenced via a load from the
8808 Global Offset Table (@GOT). */
8810 if (reload_in_progress)
8811 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
8812 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
8813 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
8815 new_rtx = force_reg (Pmode, new_rtx);
8816 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
8817 new_rtx = gen_const_mem (Pmode, new_rtx);
8818 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
8821 reg = gen_reg_rtx (Pmode);
8822 emit_move_insn (reg, new_rtx);
8828 if (CONST_INT_P (addr)
8829 && !x86_64_immediate_operand (addr, VOIDmode))
8833 emit_move_insn (reg, addr);
8837 new_rtx = force_reg (Pmode, addr);
8839 else if (GET_CODE (addr) == CONST)
8841 addr = XEXP (addr, 0);
8843 /* We must match stuff we generate before. Assume the only
8844 unspecs that can get here are ours. Not that we could do
8845 anything with them anyway.... */
8846 if (GET_CODE (addr) == UNSPEC
8847 || (GET_CODE (addr) == PLUS
8848 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
8850 gcc_assert (GET_CODE (addr) == PLUS);
8852 if (GET_CODE (addr) == PLUS)
8854 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
8856 /* Check first to see if this is a constant offset from a @GOTOFF
8857 symbol reference. */
8858 if (gotoff_operand (op0, Pmode)
8859 && CONST_INT_P (op1))
8863 if (reload_in_progress)
8864 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
8865 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
8867 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
8868 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
8869 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
8873 emit_move_insn (reg, new_rtx);
8879 if (INTVAL (op1) < -16*1024*1024
8880 || INTVAL (op1) >= 16*1024*1024)
8882 if (!x86_64_immediate_operand (op1, Pmode))
8883 op1 = force_reg (Pmode, op1);
8884 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
8890 base = legitimize_pic_address (XEXP (addr, 0), reg);
8891 new_rtx = legitimize_pic_address (XEXP (addr, 1),
8892 base == reg ? NULL_RTX : reg);
8894 if (CONST_INT_P (new_rtx))
8895 new_rtx = plus_constant (base, INTVAL (new_rtx));
8898 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
8900 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
8901 new_rtx = XEXP (new_rtx, 1);
8903 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
8911 /* Load the thread pointer. If TO_REG is true, force it into a register. */
8914 get_thread_pointer (int to_reg)
8918 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
8922 reg = gen_reg_rtx (Pmode);
8923 insn = gen_rtx_SET (VOIDmode, reg, tp);
8924 insn = emit_insn (insn);
8929 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
8930 false if we expect this to be used for a memory address and true if
8931 we expect to load the address into a register. */
8934 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
8936 rtx dest, base, off, pic, tp;
8941 case TLS_MODEL_GLOBAL_DYNAMIC:
8942 dest = gen_reg_rtx (Pmode);
8943 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8945 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
8947 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
8950 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
8951 insns = get_insns ();
8954 RTL_CONST_CALL_P (insns) = 1;
8955 emit_libcall_block (insns, dest, rax, x);
8957 else if (TARGET_64BIT && TARGET_GNU2_TLS)
8958 emit_insn (gen_tls_global_dynamic_64 (dest, x));
8960 emit_insn (gen_tls_global_dynamic_32 (dest, x));
8962 if (TARGET_GNU2_TLS)
8964 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
8966 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
8970 case TLS_MODEL_LOCAL_DYNAMIC:
8971 base = gen_reg_rtx (Pmode);
8972 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8974 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
8976 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
8979 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
8980 insns = get_insns ();
8983 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
8984 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
8985 RTL_CONST_CALL_P (insns) = 1;
8986 emit_libcall_block (insns, base, rax, note);
8988 else if (TARGET_64BIT && TARGET_GNU2_TLS)
8989 emit_insn (gen_tls_local_dynamic_base_64 (base));
8991 emit_insn (gen_tls_local_dynamic_base_32 (base));
8993 if (TARGET_GNU2_TLS)
8995 rtx x = ix86_tls_module_base ();
8997 set_unique_reg_note (get_last_insn (), REG_EQUIV,
8998 gen_rtx_MINUS (Pmode, x, tp));
9001 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9002 off = gen_rtx_CONST (Pmode, off);
9004 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9006 if (TARGET_GNU2_TLS)
9008 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9010 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9015 case TLS_MODEL_INITIAL_EXEC:
9019 type = UNSPEC_GOTNTPOFF;
9023 if (reload_in_progress)
9024 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9025 pic = pic_offset_table_rtx;
9026 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9028 else if (!TARGET_ANY_GNU_TLS)
9030 pic = gen_reg_rtx (Pmode);
9031 emit_insn (gen_set_got (pic));
9032 type = UNSPEC_GOTTPOFF;
9037 type = UNSPEC_INDNTPOFF;
9040 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9041 off = gen_rtx_CONST (Pmode, off);
9043 off = gen_rtx_PLUS (Pmode, pic, off);
9044 off = gen_const_mem (Pmode, off);
9045 set_mem_alias_set (off, ix86_GOT_alias_set ());
9047 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9049 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9050 off = force_reg (Pmode, off);
9051 return gen_rtx_PLUS (Pmode, base, off);
9055 base = get_thread_pointer (true);
9056 dest = gen_reg_rtx (Pmode);
9057 emit_insn (gen_subsi3 (dest, base, off));
9061 case TLS_MODEL_LOCAL_EXEC:
9062 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9063 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9064 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9065 off = gen_rtx_CONST (Pmode, off);
9067 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9069 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9070 return gen_rtx_PLUS (Pmode, base, off);
9074 base = get_thread_pointer (true);
9075 dest = gen_reg_rtx (Pmode);
9076 emit_insn (gen_subsi3 (dest, base, off));
9087 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9090 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9091 htab_t dllimport_map;
9094 get_dllimport_decl (tree decl)
9096 struct tree_map *h, in;
9100 size_t namelen, prefixlen;
9106 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9108 in.hash = htab_hash_pointer (decl);
9109 in.base.from = decl;
9110 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9111 h = (struct tree_map *) *loc;
9115 *loc = h = GGC_NEW (struct tree_map);
9117 h->base.from = decl;
9118 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9119 DECL_ARTIFICIAL (to) = 1;
9120 DECL_IGNORED_P (to) = 1;
9121 DECL_EXTERNAL (to) = 1;
9122 TREE_READONLY (to) = 1;
9124 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9125 name = targetm.strip_name_encoding (name);
9126 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9127 ? "*__imp_" : "*__imp__";
9128 namelen = strlen (name);
9129 prefixlen = strlen (prefix);
9130 imp_name = (char *) alloca (namelen + prefixlen + 1);
9131 memcpy (imp_name, prefix, prefixlen);
9132 memcpy (imp_name + prefixlen, name, namelen + 1);
9134 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9135 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9136 SET_SYMBOL_REF_DECL (rtl, to);
9137 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9139 rtl = gen_const_mem (Pmode, rtl);
9140 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9142 SET_DECL_RTL (to, rtl);
9143 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9148 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9149 true if we require the result be a register. */
9152 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9157 gcc_assert (SYMBOL_REF_DECL (symbol));
9158 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
9160 x = DECL_RTL (imp_decl);
9162 x = force_reg (Pmode, x);
9166 /* Try machine-dependent ways of modifying an illegitimate address
9167 to be legitimate. If we find one, return the new, valid address.
9168 This macro is used in only one place: `memory_address' in explow.c.
9170 OLDX is the address as it was before break_out_memory_refs was called.
9171 In some cases it is useful to look at this to decide what needs to be done.
9173 MODE and WIN are passed so that this macro can use
9174 GO_IF_LEGITIMATE_ADDRESS.
9176 It is always safe for this macro to do nothing. It exists to recognize
9177 opportunities to optimize the output.
9179 For the 80386, we handle X+REG by loading X into a register R and
9180 using R+REG. R will go in a general reg and indexing will be used.
9181 However, if REG is a broken-out memory address or multiplication,
9182 nothing needs to be done because REG can certainly go in a general reg.
9184 When -fpic is used, special handling is needed for symbolic references.
9185 See comments by legitimize_pic_address in i386.c for details. */
9188 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
9193 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
9195 return legitimize_tls_address (x, (enum tls_model) log, false);
9196 if (GET_CODE (x) == CONST
9197 && GET_CODE (XEXP (x, 0)) == PLUS
9198 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9199 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
9201 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
9202 (enum tls_model) log, false);
9203 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9206 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9208 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
9209 return legitimize_dllimport_symbol (x, true);
9210 if (GET_CODE (x) == CONST
9211 && GET_CODE (XEXP (x, 0)) == PLUS
9212 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9213 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
9215 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
9216 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9220 if (flag_pic && SYMBOLIC_CONST (x))
9221 return legitimize_pic_address (x, 0);
9223 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
9224 if (GET_CODE (x) == ASHIFT
9225 && CONST_INT_P (XEXP (x, 1))
9226 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
9229 log = INTVAL (XEXP (x, 1));
9230 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
9231 GEN_INT (1 << log));
9234 if (GET_CODE (x) == PLUS)
9236 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
9238 if (GET_CODE (XEXP (x, 0)) == ASHIFT
9239 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9240 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
9243 log = INTVAL (XEXP (XEXP (x, 0), 1));
9244 XEXP (x, 0) = gen_rtx_MULT (Pmode,
9245 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
9246 GEN_INT (1 << log));
9249 if (GET_CODE (XEXP (x, 1)) == ASHIFT
9250 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9251 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
9254 log = INTVAL (XEXP (XEXP (x, 1), 1));
9255 XEXP (x, 1) = gen_rtx_MULT (Pmode,
9256 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
9257 GEN_INT (1 << log));
9260 /* Put multiply first if it isn't already. */
9261 if (GET_CODE (XEXP (x, 1)) == MULT)
9263 rtx tmp = XEXP (x, 0);
9264 XEXP (x, 0) = XEXP (x, 1);
9269 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
9270 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
9271 created by virtual register instantiation, register elimination, and
9272 similar optimizations. */
9273 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
9276 x = gen_rtx_PLUS (Pmode,
9277 gen_rtx_PLUS (Pmode, XEXP (x, 0),
9278 XEXP (XEXP (x, 1), 0)),
9279 XEXP (XEXP (x, 1), 1));
9283 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
9284 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
9285 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
9286 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9287 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
9288 && CONSTANT_P (XEXP (x, 1)))
9291 rtx other = NULL_RTX;
9293 if (CONST_INT_P (XEXP (x, 1)))
9295 constant = XEXP (x, 1);
9296 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
9298 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
9300 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
9301 other = XEXP (x, 1);
9309 x = gen_rtx_PLUS (Pmode,
9310 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
9311 XEXP (XEXP (XEXP (x, 0), 1), 0)),
9312 plus_constant (other, INTVAL (constant)));
9316 if (changed && legitimate_address_p (mode, x, FALSE))
9319 if (GET_CODE (XEXP (x, 0)) == MULT)
9322 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
9325 if (GET_CODE (XEXP (x, 1)) == MULT)
9328 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
9332 && REG_P (XEXP (x, 1))
9333 && REG_P (XEXP (x, 0)))
9336 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
9339 x = legitimize_pic_address (x, 0);
9342 if (changed && legitimate_address_p (mode, x, FALSE))
9345 if (REG_P (XEXP (x, 0)))
9347 rtx temp = gen_reg_rtx (Pmode);
9348 rtx val = force_operand (XEXP (x, 1), temp);
9350 emit_move_insn (temp, val);
9356 else if (REG_P (XEXP (x, 1)))
9358 rtx temp = gen_reg_rtx (Pmode);
9359 rtx val = force_operand (XEXP (x, 0), temp);
9361 emit_move_insn (temp, val);
9371 /* Print an integer constant expression in assembler syntax. Addition
9372 and subtraction are the only arithmetic that may appear in these
9373 expressions. FILE is the stdio stream to write to, X is the rtx, and
9374 CODE is the operand print code from the output string. */
9377 output_pic_addr_const (FILE *file, rtx x, int code)
9381 switch (GET_CODE (x))
9384 gcc_assert (flag_pic);
9389 if (! TARGET_MACHO || TARGET_64BIT)
9390 output_addr_const (file, x);
9393 const char *name = XSTR (x, 0);
9395 /* Mark the decl as referenced so that cgraph will
9396 output the function. */
9397 if (SYMBOL_REF_DECL (x))
9398 mark_decl_referenced (SYMBOL_REF_DECL (x));
9401 if (MACHOPIC_INDIRECT
9402 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
9403 name = machopic_indirection_name (x, /*stub_p=*/true);
9405 assemble_name (file, name);
9407 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
9408 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
9409 fputs ("@PLT", file);
9416 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
9417 assemble_name (asm_out_file, buf);
9421 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9425 /* This used to output parentheses around the expression,
9426 but that does not work on the 386 (either ATT or BSD assembler). */
9427 output_pic_addr_const (file, XEXP (x, 0), code);
9431 if (GET_MODE (x) == VOIDmode)
9433 /* We can use %d if the number is <32 bits and positive. */
9434 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
9435 fprintf (file, "0x%lx%08lx",
9436 (unsigned long) CONST_DOUBLE_HIGH (x),
9437 (unsigned long) CONST_DOUBLE_LOW (x));
9439 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
9442 /* We can't handle floating point constants;
9443 PRINT_OPERAND must handle them. */
9444 output_operand_lossage ("floating constant misused");
9448 /* Some assemblers need integer constants to appear first. */
9449 if (CONST_INT_P (XEXP (x, 0)))
9451 output_pic_addr_const (file, XEXP (x, 0), code);
9453 output_pic_addr_const (file, XEXP (x, 1), code);
9457 gcc_assert (CONST_INT_P (XEXP (x, 1)));
9458 output_pic_addr_const (file, XEXP (x, 1), code);
9460 output_pic_addr_const (file, XEXP (x, 0), code);
9466 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
9467 output_pic_addr_const (file, XEXP (x, 0), code);
9469 output_pic_addr_const (file, XEXP (x, 1), code);
9471 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
9475 gcc_assert (XVECLEN (x, 0) == 1);
9476 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
9477 switch (XINT (x, 1))
9480 fputs ("@GOT", file);
9483 fputs ("@GOTOFF", file);
9486 fputs ("@PLTOFF", file);
9488 case UNSPEC_GOTPCREL:
9489 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9490 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
9492 case UNSPEC_GOTTPOFF:
9493 /* FIXME: This might be @TPOFF in Sun ld too. */
9494 fputs ("@GOTTPOFF", file);
9497 fputs ("@TPOFF", file);
9501 fputs ("@TPOFF", file);
9503 fputs ("@NTPOFF", file);
9506 fputs ("@DTPOFF", file);
9508 case UNSPEC_GOTNTPOFF:
9510 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9511 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
9513 fputs ("@GOTNTPOFF", file);
9515 case UNSPEC_INDNTPOFF:
9516 fputs ("@INDNTPOFF", file);
9519 output_operand_lossage ("invalid UNSPEC as operand");
9525 output_operand_lossage ("invalid expression as operand");
9529 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9530 We need to emit DTP-relative relocations. */
9532 static void ATTRIBUTE_UNUSED
9533 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
9535 fputs (ASM_LONG, file);
9536 output_addr_const (file, x);
9537 fputs ("@DTPOFF", file);
9543 fputs (", 0", file);
9550 /* In the name of slightly smaller debug output, and to cater to
9551 general assembler lossage, recognize PIC+GOTOFF and turn it back
9552 into a direct symbol reference.
9554 On Darwin, this is necessary to avoid a crash, because Darwin
9555 has a different PIC label for each routine but the DWARF debugging
9556 information is not associated with any particular routine, so it's
9557 necessary to remove references to the PIC label from RTL stored by
9558 the DWARF output code. */
9561 ix86_delegitimize_address (rtx orig_x)
9564 /* reg_addend is NULL or a multiple of some register. */
9565 rtx reg_addend = NULL_RTX;
9566 /* const_addend is NULL or a const_int. */
9567 rtx const_addend = NULL_RTX;
9568 /* This is the result, or NULL. */
9569 rtx result = NULL_RTX;
9576 if (GET_CODE (x) != CONST
9577 || GET_CODE (XEXP (x, 0)) != UNSPEC
9578 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
9581 return XVECEXP (XEXP (x, 0), 0, 0);
9584 if (GET_CODE (x) != PLUS
9585 || GET_CODE (XEXP (x, 1)) != CONST)
9588 if (REG_P (XEXP (x, 0))
9589 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
9590 /* %ebx + GOT/GOTOFF */
9592 else if (GET_CODE (XEXP (x, 0)) == PLUS)
9594 /* %ebx + %reg * scale + GOT/GOTOFF */
9595 reg_addend = XEXP (x, 0);
9596 if (REG_P (XEXP (reg_addend, 0))
9597 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
9598 reg_addend = XEXP (reg_addend, 1);
9599 else if (REG_P (XEXP (reg_addend, 1))
9600 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
9601 reg_addend = XEXP (reg_addend, 0);
9604 if (!REG_P (reg_addend)
9605 && GET_CODE (reg_addend) != MULT
9606 && GET_CODE (reg_addend) != ASHIFT)
9612 x = XEXP (XEXP (x, 1), 0);
9613 if (GET_CODE (x) == PLUS
9614 && CONST_INT_P (XEXP (x, 1)))
9616 const_addend = XEXP (x, 1);
9620 if (GET_CODE (x) == UNSPEC
9621 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
9622 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
9623 result = XVECEXP (x, 0, 0);
9625 if (TARGET_MACHO && darwin_local_data_pic (x)
9627 result = XEXP (x, 0);
9633 result = gen_rtx_PLUS (Pmode, result, const_addend);
9635 result = gen_rtx_PLUS (Pmode, reg_addend, result);
9639 /* If X is a machine specific address (i.e. a symbol or label being
9640 referenced as a displacement from the GOT implemented using an
9641 UNSPEC), then return the base term. Otherwise return X. */
9644 ix86_find_base_term (rtx x)
9650 if (GET_CODE (x) != CONST)
9653 if (GET_CODE (term) == PLUS
9654 && (CONST_INT_P (XEXP (term, 1))
9655 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
9656 term = XEXP (term, 0);
9657 if (GET_CODE (term) != UNSPEC
9658 || XINT (term, 1) != UNSPEC_GOTPCREL)
9661 term = XVECEXP (term, 0, 0);
9663 if (GET_CODE (term) != SYMBOL_REF
9664 && GET_CODE (term) != LABEL_REF)
9670 term = ix86_delegitimize_address (x);
9672 if (GET_CODE (term) != SYMBOL_REF
9673 && GET_CODE (term) != LABEL_REF)
9680 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
9685 if (mode == CCFPmode || mode == CCFPUmode)
9687 enum rtx_code second_code, bypass_code;
9688 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
9689 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
9690 code = ix86_fp_compare_code_to_integer (code);
9694 code = reverse_condition (code);
9745 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
9749 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
9750 Those same assemblers have the same but opposite lossage on cmov. */
9752 suffix = fp ? "nbe" : "a";
9753 else if (mode == CCCmode)
9776 gcc_assert (mode == CCmode || mode == CCCmode);
9798 gcc_assert (mode == CCmode || mode == CCCmode);
9799 suffix = fp ? "nb" : "ae";
9802 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
9809 else if (mode == CCCmode)
9810 suffix = fp ? "nb" : "ae";
9815 suffix = fp ? "u" : "p";
9818 suffix = fp ? "nu" : "np";
9823 fputs (suffix, file);
9826 /* Print the name of register X to FILE based on its machine mode and number.
9827 If CODE is 'w', pretend the mode is HImode.
9828 If CODE is 'b', pretend the mode is QImode.
9829 If CODE is 'k', pretend the mode is SImode.
9830 If CODE is 'q', pretend the mode is DImode.
9831 If CODE is 'h', pretend the reg is the 'high' byte register.
9832 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
9835 print_reg (rtx x, int code, FILE *file)
9837 gcc_assert (x == pc_rtx
9838 || (REGNO (x) != ARG_POINTER_REGNUM
9839 && REGNO (x) != FRAME_POINTER_REGNUM
9840 && REGNO (x) != FLAGS_REG
9841 && REGNO (x) != FPSR_REG
9842 && REGNO (x) != FPCR_REG));
9844 if (ASSEMBLER_DIALECT == ASM_ATT)
9849 gcc_assert (TARGET_64BIT);
9850 fputs ("rip", file);
9854 if (code == 'w' || MMX_REG_P (x))
9856 else if (code == 'b')
9858 else if (code == 'k')
9860 else if (code == 'q')
9862 else if (code == 'y')
9864 else if (code == 'h')
9867 code = GET_MODE_SIZE (GET_MODE (x));
9869 /* Irritatingly, AMD extended registers use different naming convention
9870 from the normal registers. */
9871 if (REX_INT_REG_P (x))
9873 gcc_assert (TARGET_64BIT);
9877 error ("extended registers have no high halves");
9880 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
9883 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
9886 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
9889 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
9892 error ("unsupported operand size for extended register");
9900 if (STACK_TOP_P (x))
9902 fputs ("st(0)", file);
9909 if (! ANY_FP_REG_P (x))
9910 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
9915 fputs (hi_reg_name[REGNO (x)], file);
9918 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
9920 fputs (qi_reg_name[REGNO (x)], file);
9923 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
9925 fputs (qi_high_reg_name[REGNO (x)], file);
9932 /* Locate some local-dynamic symbol still in use by this function
9933 so that we can print its name in some tls_local_dynamic_base
9937 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
9941 if (GET_CODE (x) == SYMBOL_REF
9942 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
9944 cfun->machine->some_ld_name = XSTR (x, 0);
9952 get_some_local_dynamic_name (void)
9956 if (cfun->machine->some_ld_name)
9957 return cfun->machine->some_ld_name;
9959 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
9961 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
9962 return cfun->machine->some_ld_name;
9968 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
9969 C -- print opcode suffix for set/cmov insn.
9970 c -- like C, but print reversed condition
9971 E,e -- likewise, but for compare-and-branch fused insn.
9972 F,f -- likewise, but for floating-point.
9973 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
9975 R -- print the prefix for register names.
9976 z -- print the opcode suffix for the size of the current operand.
9977 * -- print a star (in certain assembler syntax)
9978 A -- print an absolute memory reference.
9979 w -- print the operand as if it's a "word" (HImode) even if it isn't.
9980 s -- print a shift double count, followed by the assemblers argument
9982 b -- print the QImode name of the register for the indicated operand.
9983 %b0 would print %al if operands[0] is reg 0.
9984 w -- likewise, print the HImode name of the register.
9985 k -- likewise, print the SImode name of the register.
9986 q -- likewise, print the DImode name of the register.
9987 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
9988 y -- print "st(0)" instead of "st" as a register.
9989 D -- print condition for SSE cmp instruction.
9990 P -- if PIC, print an @PLT suffix.
9991 X -- don't print any sort of PIC '@' suffix for a symbol.
9992 & -- print some in-use local-dynamic symbol name.
9993 H -- print a memory address offset by 8; used for sse high-parts
9994 Y -- print condition for SSE5 com* instruction.
9995 + -- print a branch hint as 'cs' or 'ds' prefix
9996 ; -- print a semicolon (after prefixes due to bug in older gas).
10000 print_operand (FILE *file, rtx x, int code)
10007 if (ASSEMBLER_DIALECT == ASM_ATT)
10012 assemble_name (file, get_some_local_dynamic_name ());
10016 switch (ASSEMBLER_DIALECT)
10023 /* Intel syntax. For absolute addresses, registers should not
10024 be surrounded by braces. */
10028 PRINT_OPERAND (file, x, 0);
10035 gcc_unreachable ();
10038 PRINT_OPERAND (file, x, 0);
10043 if (ASSEMBLER_DIALECT == ASM_ATT)
10048 if (ASSEMBLER_DIALECT == ASM_ATT)
10053 if (ASSEMBLER_DIALECT == ASM_ATT)
10058 if (ASSEMBLER_DIALECT == ASM_ATT)
10063 if (ASSEMBLER_DIALECT == ASM_ATT)
10068 if (ASSEMBLER_DIALECT == ASM_ATT)
10073 /* 387 opcodes don't get size suffixes if the operands are
10075 if (STACK_REG_P (x))
10078 /* Likewise if using Intel opcodes. */
10079 if (ASSEMBLER_DIALECT == ASM_INTEL)
10082 /* This is the size of op from size of operand. */
10083 switch (GET_MODE_SIZE (GET_MODE (x)))
10092 #ifdef HAVE_GAS_FILDS_FISTS
10102 if (GET_MODE (x) == SFmode)
10117 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10121 #ifdef GAS_MNEMONICS
10136 gcc_unreachable ();
10150 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
10152 PRINT_OPERAND (file, x, 0);
10153 fputs (", ", file);
10158 /* Little bit of braindamage here. The SSE compare instructions
10159 does use completely different names for the comparisons that the
10160 fp conditional moves. */
10161 switch (GET_CODE (x))
10165 fputs ("eq", file);
10169 fputs ("lt", file);
10173 fputs ("le", file);
10176 fputs ("unord", file);
10180 fputs ("neq", file);
10184 fputs ("nlt", file);
10188 fputs ("nle", file);
10191 fputs ("ord", file);
10194 gcc_unreachable ();
10198 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10199 if (ASSEMBLER_DIALECT == ASM_ATT)
10201 switch (GET_MODE (x))
10203 case HImode: putc ('w', file); break;
10205 case SFmode: putc ('l', file); break;
10207 case DFmode: putc ('q', file); break;
10208 default: gcc_unreachable ();
10215 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
10218 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10219 if (ASSEMBLER_DIALECT == ASM_ATT)
10222 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
10225 /* Like above, but reverse condition */
10227 /* Check to see if argument to %c is really a constant
10228 and not a condition code which needs to be reversed. */
10229 if (!COMPARISON_P (x))
10231 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
10234 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
10237 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10238 if (ASSEMBLER_DIALECT == ASM_ATT)
10241 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
10245 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
10249 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
10253 /* It doesn't actually matter what mode we use here, as we're
10254 only going to use this for printing. */
10255 x = adjust_address_nv (x, DImode, 8);
10262 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
10265 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
10268 int pred_val = INTVAL (XEXP (x, 0));
10270 if (pred_val < REG_BR_PROB_BASE * 45 / 100
10271 || pred_val > REG_BR_PROB_BASE * 55 / 100)
10273 int taken = pred_val > REG_BR_PROB_BASE / 2;
10274 int cputaken = final_forward_branch_p (current_output_insn) == 0;
10276 /* Emit hints only in the case default branch prediction
10277 heuristics would fail. */
10278 if (taken != cputaken)
10280 /* We use 3e (DS) prefix for taken branches and
10281 2e (CS) prefix for not taken branches. */
10283 fputs ("ds ; ", file);
10285 fputs ("cs ; ", file);
10293 switch (GET_CODE (x))
10296 fputs ("neq", file);
10299 fputs ("eq", file);
10303 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
10307 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
10311 fputs ("le", file);
10315 fputs ("lt", file);
10318 fputs ("unord", file);
10321 fputs ("ord", file);
10324 fputs ("ueq", file);
10327 fputs ("nlt", file);
10330 fputs ("nle", file);
10333 fputs ("ule", file);
10336 fputs ("ult", file);
10339 fputs ("une", file);
10342 gcc_unreachable ();
10348 fputs (" ; ", file);
10355 output_operand_lossage ("invalid operand code '%c'", code);
10360 print_reg (x, code, file);
10362 else if (MEM_P (x))
10364 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
10365 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
10366 && GET_MODE (x) != BLKmode)
10369 switch (GET_MODE_SIZE (GET_MODE (x)))
10371 case 1: size = "BYTE"; break;
10372 case 2: size = "WORD"; break;
10373 case 4: size = "DWORD"; break;
10374 case 8: size = "QWORD"; break;
10375 case 12: size = "XWORD"; break;
10377 if (GET_MODE (x) == XFmode)
10383 gcc_unreachable ();
10386 /* Check for explicit size override (codes 'b', 'w' and 'k') */
10389 else if (code == 'w')
10391 else if (code == 'k')
10394 fputs (size, file);
10395 fputs (" PTR ", file);
10399 /* Avoid (%rip) for call operands. */
10400 if (CONSTANT_ADDRESS_P (x) && code == 'P'
10401 && !CONST_INT_P (x))
10402 output_addr_const (file, x);
10403 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
10404 output_operand_lossage ("invalid constraints for operand");
10406 output_address (x);
10409 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
10414 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10415 REAL_VALUE_TO_TARGET_SINGLE (r, l);
10417 if (ASSEMBLER_DIALECT == ASM_ATT)
10419 fprintf (file, "0x%08lx", (long unsigned int) l);
10422 /* These float cases don't actually occur as immediate operands. */
10423 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
10427 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
10428 fprintf (file, "%s", dstr);
10431 else if (GET_CODE (x) == CONST_DOUBLE
10432 && GET_MODE (x) == XFmode)
10436 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
10437 fprintf (file, "%s", dstr);
10442 /* We have patterns that allow zero sets of memory, for instance.
10443 In 64-bit mode, we should probably support all 8-byte vectors,
10444 since we can in fact encode that into an immediate. */
10445 if (GET_CODE (x) == CONST_VECTOR)
10447 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
10453 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
10455 if (ASSEMBLER_DIALECT == ASM_ATT)
10458 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
10459 || GET_CODE (x) == LABEL_REF)
10461 if (ASSEMBLER_DIALECT == ASM_ATT)
10464 fputs ("OFFSET FLAT:", file);
10467 if (CONST_INT_P (x))
10468 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10470 output_pic_addr_const (file, x, code);
10472 output_addr_const (file, x);
10476 /* Print a memory operand whose address is ADDR. */
10479 print_operand_address (FILE *file, rtx addr)
10481 struct ix86_address parts;
10482 rtx base, index, disp;
10484 int ok = ix86_decompose_address (addr, &parts);
10489 index = parts.index;
10491 scale = parts.scale;
10499 if (ASSEMBLER_DIALECT == ASM_ATT)
10501 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
10504 gcc_unreachable ();
10507 /* Use one byte shorter RIP relative addressing for 64bit mode. */
10508 if (TARGET_64BIT && !base && !index)
10512 if (GET_CODE (disp) == CONST
10513 && GET_CODE (XEXP (disp, 0)) == PLUS
10514 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
10515 symbol = XEXP (XEXP (disp, 0), 0);
10517 if (GET_CODE (symbol) == LABEL_REF
10518 || (GET_CODE (symbol) == SYMBOL_REF
10519 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
10522 if (!base && !index)
10524 /* Displacement only requires special attention. */
10526 if (CONST_INT_P (disp))
10528 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
10529 fputs ("ds:", file);
10530 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
10533 output_pic_addr_const (file, disp, 0);
10535 output_addr_const (file, disp);
10539 if (ASSEMBLER_DIALECT == ASM_ATT)
10544 output_pic_addr_const (file, disp, 0);
10545 else if (GET_CODE (disp) == LABEL_REF)
10546 output_asm_label (disp);
10548 output_addr_const (file, disp);
10553 print_reg (base, 0, file);
10557 print_reg (index, 0, file);
10559 fprintf (file, ",%d", scale);
10565 rtx offset = NULL_RTX;
10569 /* Pull out the offset of a symbol; print any symbol itself. */
10570 if (GET_CODE (disp) == CONST
10571 && GET_CODE (XEXP (disp, 0)) == PLUS
10572 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
10574 offset = XEXP (XEXP (disp, 0), 1);
10575 disp = gen_rtx_CONST (VOIDmode,
10576 XEXP (XEXP (disp, 0), 0));
10580 output_pic_addr_const (file, disp, 0);
10581 else if (GET_CODE (disp) == LABEL_REF)
10582 output_asm_label (disp);
10583 else if (CONST_INT_P (disp))
10586 output_addr_const (file, disp);
10592 print_reg (base, 0, file);
10595 if (INTVAL (offset) >= 0)
10597 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
10601 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
10608 print_reg (index, 0, file);
10610 fprintf (file, "*%d", scale);
10618 output_addr_const_extra (FILE *file, rtx x)
10622 if (GET_CODE (x) != UNSPEC)
10625 op = XVECEXP (x, 0, 0);
10626 switch (XINT (x, 1))
10628 case UNSPEC_GOTTPOFF:
10629 output_addr_const (file, op);
10630 /* FIXME: This might be @TPOFF in Sun ld. */
10631 fputs ("@GOTTPOFF", file);
10634 output_addr_const (file, op);
10635 fputs ("@TPOFF", file);
10637 case UNSPEC_NTPOFF:
10638 output_addr_const (file, op);
10640 fputs ("@TPOFF", file);
10642 fputs ("@NTPOFF", file);
10644 case UNSPEC_DTPOFF:
10645 output_addr_const (file, op);
10646 fputs ("@DTPOFF", file);
10648 case UNSPEC_GOTNTPOFF:
10649 output_addr_const (file, op);
10651 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10652 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
10654 fputs ("@GOTNTPOFF", file);
10656 case UNSPEC_INDNTPOFF:
10657 output_addr_const (file, op);
10658 fputs ("@INDNTPOFF", file);
10668 /* Split one or more DImode RTL references into pairs of SImode
10669 references. The RTL can be REG, offsettable MEM, integer constant, or
10670 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
10671 split and "num" is its length. lo_half and hi_half are output arrays
10672 that parallel "operands". */
10675 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
10679 rtx op = operands[num];
10681 /* simplify_subreg refuse to split volatile memory addresses,
10682 but we still have to handle it. */
10685 lo_half[num] = adjust_address (op, SImode, 0);
10686 hi_half[num] = adjust_address (op, SImode, 4);
10690 lo_half[num] = simplify_gen_subreg (SImode, op,
10691 GET_MODE (op) == VOIDmode
10692 ? DImode : GET_MODE (op), 0);
10693 hi_half[num] = simplify_gen_subreg (SImode, op,
10694 GET_MODE (op) == VOIDmode
10695 ? DImode : GET_MODE (op), 4);
10699 /* Split one or more TImode RTL references into pairs of DImode
10700 references. The RTL can be REG, offsettable MEM, integer constant, or
10701 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
10702 split and "num" is its length. lo_half and hi_half are output arrays
10703 that parallel "operands". */
10706 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
10710 rtx op = operands[num];
10712 /* simplify_subreg refuse to split volatile memory addresses, but we
10713 still have to handle it. */
10716 lo_half[num] = adjust_address (op, DImode, 0);
10717 hi_half[num] = adjust_address (op, DImode, 8);
10721 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
10722 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
10727 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
10728 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
10729 is the expression of the binary operation. The output may either be
10730 emitted here, or returned to the caller, like all output_* functions.
10732 There is no guarantee that the operands are the same mode, as they
10733 might be within FLOAT or FLOAT_EXTEND expressions. */
10735 #ifndef SYSV386_COMPAT
10736 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
10737 wants to fix the assemblers because that causes incompatibility
10738 with gcc. No-one wants to fix gcc because that causes
10739 incompatibility with assemblers... You can use the option of
10740 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
10741 #define SYSV386_COMPAT 1
10745 output_387_binary_op (rtx insn, rtx *operands)
10747 static char buf[30];
10750 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
10752 #ifdef ENABLE_CHECKING
10753 /* Even if we do not want to check the inputs, this documents input
10754 constraints. Which helps in understanding the following code. */
10755 if (STACK_REG_P (operands[0])
10756 && ((REG_P (operands[1])
10757 && REGNO (operands[0]) == REGNO (operands[1])
10758 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
10759 || (REG_P (operands[2])
10760 && REGNO (operands[0]) == REGNO (operands[2])
10761 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
10762 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
10765 gcc_assert (is_sse);
10768 switch (GET_CODE (operands[3]))
10771 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
10772 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
10780 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
10781 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
10789 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
10790 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
10798 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
10799 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
10807 gcc_unreachable ();
10812 strcpy (buf, ssep);
10813 if (GET_MODE (operands[0]) == SFmode)
10814 strcat (buf, "ss\t{%2, %0|%0, %2}");
10816 strcat (buf, "sd\t{%2, %0|%0, %2}");
10821 switch (GET_CODE (operands[3]))
10825 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
10827 rtx temp = operands[2];
10828 operands[2] = operands[1];
10829 operands[1] = temp;
10832 /* know operands[0] == operands[1]. */
10834 if (MEM_P (operands[2]))
10840 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
10842 if (STACK_TOP_P (operands[0]))
10843 /* How is it that we are storing to a dead operand[2]?
10844 Well, presumably operands[1] is dead too. We can't
10845 store the result to st(0) as st(0) gets popped on this
10846 instruction. Instead store to operands[2] (which I
10847 think has to be st(1)). st(1) will be popped later.
10848 gcc <= 2.8.1 didn't have this check and generated
10849 assembly code that the Unixware assembler rejected. */
10850 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
10852 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
10856 if (STACK_TOP_P (operands[0]))
10857 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
10859 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
10864 if (MEM_P (operands[1]))
10870 if (MEM_P (operands[2]))
10876 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
10879 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
10880 derived assemblers, confusingly reverse the direction of
10881 the operation for fsub{r} and fdiv{r} when the
10882 destination register is not st(0). The Intel assembler
10883 doesn't have this brain damage. Read !SYSV386_COMPAT to
10884 figure out what the hardware really does. */
10885 if (STACK_TOP_P (operands[0]))
10886 p = "{p\t%0, %2|rp\t%2, %0}";
10888 p = "{rp\t%2, %0|p\t%0, %2}";
10890 if (STACK_TOP_P (operands[0]))
10891 /* As above for fmul/fadd, we can't store to st(0). */
10892 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
10894 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
10899 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
10902 if (STACK_TOP_P (operands[0]))
10903 p = "{rp\t%0, %1|p\t%1, %0}";
10905 p = "{p\t%1, %0|rp\t%0, %1}";
10907 if (STACK_TOP_P (operands[0]))
10908 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
10910 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
10915 if (STACK_TOP_P (operands[0]))
10917 if (STACK_TOP_P (operands[1]))
10918 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
10920 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
10923 else if (STACK_TOP_P (operands[1]))
10926 p = "{\t%1, %0|r\t%0, %1}";
10928 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
10934 p = "{r\t%2, %0|\t%0, %2}";
10936 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
10942 gcc_unreachable ();
10949 /* Return needed mode for entity in optimize_mode_switching pass. */
10952 ix86_mode_needed (int entity, rtx insn)
10954 enum attr_i387_cw mode;
10956 /* The mode UNINITIALIZED is used to store control word after a
10957 function call or ASM pattern. The mode ANY specify that function
10958 has no requirements on the control word and make no changes in the
10959 bits we are interested in. */
10962 || (NONJUMP_INSN_P (insn)
10963 && (asm_noperands (PATTERN (insn)) >= 0
10964 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
10965 return I387_CW_UNINITIALIZED;
10967 if (recog_memoized (insn) < 0)
10968 return I387_CW_ANY;
10970 mode = get_attr_i387_cw (insn);
10975 if (mode == I387_CW_TRUNC)
10980 if (mode == I387_CW_FLOOR)
10985 if (mode == I387_CW_CEIL)
10990 if (mode == I387_CW_MASK_PM)
10995 gcc_unreachable ();
10998 return I387_CW_ANY;
11001 /* Output code to initialize control word copies used by trunc?f?i and
11002 rounding patterns. CURRENT_MODE is set to current control word,
11003 while NEW_MODE is set to new control word. */
11006 emit_i387_cw_initialization (int mode)
11008 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
11011 enum ix86_stack_slot slot;
11013 rtx reg = gen_reg_rtx (HImode);
11015 emit_insn (gen_x86_fnstcw_1 (stored_mode));
11016 emit_move_insn (reg, copy_rtx (stored_mode));
11018 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
11022 case I387_CW_TRUNC:
11023 /* round toward zero (truncate) */
11024 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
11025 slot = SLOT_CW_TRUNC;
11028 case I387_CW_FLOOR:
11029 /* round down toward -oo */
11030 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11031 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
11032 slot = SLOT_CW_FLOOR;
11036 /* round up toward +oo */
11037 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11038 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
11039 slot = SLOT_CW_CEIL;
11042 case I387_CW_MASK_PM:
11043 /* mask precision exception for nearbyint() */
11044 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11045 slot = SLOT_CW_MASK_PM;
11049 gcc_unreachable ();
11056 case I387_CW_TRUNC:
11057 /* round toward zero (truncate) */
11058 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
11059 slot = SLOT_CW_TRUNC;
11062 case I387_CW_FLOOR:
11063 /* round down toward -oo */
11064 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
11065 slot = SLOT_CW_FLOOR;
11069 /* round up toward +oo */
11070 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
11071 slot = SLOT_CW_CEIL;
11074 case I387_CW_MASK_PM:
11075 /* mask precision exception for nearbyint() */
11076 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11077 slot = SLOT_CW_MASK_PM;
11081 gcc_unreachable ();
11085 gcc_assert (slot < MAX_386_STACK_LOCALS);
11087 new_mode = assign_386_stack_local (HImode, slot);
11088 emit_move_insn (new_mode, reg);
11091 /* Output code for INSN to convert a float to a signed int. OPERANDS
11092 are the insn operands. The output may be [HSD]Imode and the input
11093 operand may be [SDX]Fmode. */
11096 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
11098 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11099 int dimode_p = GET_MODE (operands[0]) == DImode;
11100 int round_mode = get_attr_i387_cw (insn);
11102 /* Jump through a hoop or two for DImode, since the hardware has no
11103 non-popping instruction. We used to do this a different way, but
11104 that was somewhat fragile and broke with post-reload splitters. */
11105 if ((dimode_p || fisttp) && !stack_top_dies)
11106 output_asm_insn ("fld\t%y1", operands);
11108 gcc_assert (STACK_TOP_P (operands[1]));
11109 gcc_assert (MEM_P (operands[0]));
11110 gcc_assert (GET_MODE (operands[1]) != TFmode);
11113 output_asm_insn ("fisttp%z0\t%0", operands);
11116 if (round_mode != I387_CW_ANY)
11117 output_asm_insn ("fldcw\t%3", operands);
11118 if (stack_top_dies || dimode_p)
11119 output_asm_insn ("fistp%z0\t%0", operands);
11121 output_asm_insn ("fist%z0\t%0", operands);
11122 if (round_mode != I387_CW_ANY)
11123 output_asm_insn ("fldcw\t%2", operands);
11129 /* Output code for x87 ffreep insn. The OPNO argument, which may only
11130 have the values zero or one, indicates the ffreep insn's operand
11131 from the OPERANDS array. */
11133 static const char *
11134 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
11136 if (TARGET_USE_FFREEP)
11137 #if HAVE_AS_IX86_FFREEP
11138 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
11141 static char retval[] = ".word\t0xc_df";
11142 int regno = REGNO (operands[opno]);
11144 gcc_assert (FP_REGNO_P (regno));
11146 retval[9] = '0' + (regno - FIRST_STACK_REG);
11151 return opno ? "fstp\t%y1" : "fstp\t%y0";
11155 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
11156 should be used. UNORDERED_P is true when fucom should be used. */
11159 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
11161 int stack_top_dies;
11162 rtx cmp_op0, cmp_op1;
11163 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
11167 cmp_op0 = operands[0];
11168 cmp_op1 = operands[1];
11172 cmp_op0 = operands[1];
11173 cmp_op1 = operands[2];
11178 if (GET_MODE (operands[0]) == SFmode)
11180 return "ucomiss\t{%1, %0|%0, %1}";
11182 return "comiss\t{%1, %0|%0, %1}";
11185 return "ucomisd\t{%1, %0|%0, %1}";
11187 return "comisd\t{%1, %0|%0, %1}";
11190 gcc_assert (STACK_TOP_P (cmp_op0));
11192 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11194 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
11196 if (stack_top_dies)
11198 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
11199 return output_387_ffreep (operands, 1);
11202 return "ftst\n\tfnstsw\t%0";
11205 if (STACK_REG_P (cmp_op1)
11207 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
11208 && REGNO (cmp_op1) != FIRST_STACK_REG)
11210 /* If both the top of the 387 stack dies, and the other operand
11211 is also a stack register that dies, then this must be a
11212 `fcompp' float compare */
11216 /* There is no double popping fcomi variant. Fortunately,
11217 eflags is immune from the fstp's cc clobbering. */
11219 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
11221 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
11222 return output_387_ffreep (operands, 0);
11227 return "fucompp\n\tfnstsw\t%0";
11229 return "fcompp\n\tfnstsw\t%0";
11234 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
11236 static const char * const alt[16] =
11238 "fcom%z2\t%y2\n\tfnstsw\t%0",
11239 "fcomp%z2\t%y2\n\tfnstsw\t%0",
11240 "fucom%z2\t%y2\n\tfnstsw\t%0",
11241 "fucomp%z2\t%y2\n\tfnstsw\t%0",
11243 "ficom%z2\t%y2\n\tfnstsw\t%0",
11244 "ficomp%z2\t%y2\n\tfnstsw\t%0",
11248 "fcomi\t{%y1, %0|%0, %y1}",
11249 "fcomip\t{%y1, %0|%0, %y1}",
11250 "fucomi\t{%y1, %0|%0, %y1}",
11251 "fucomip\t{%y1, %0|%0, %y1}",
11262 mask = eflags_p << 3;
11263 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
11264 mask |= unordered_p << 1;
11265 mask |= stack_top_dies;
11267 gcc_assert (mask < 16);
11276 ix86_output_addr_vec_elt (FILE *file, int value)
11278 const char *directive = ASM_LONG;
11282 directive = ASM_QUAD;
11284 gcc_assert (!TARGET_64BIT);
11287 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
11291 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
11293 const char *directive = ASM_LONG;
11296 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
11297 directive = ASM_QUAD;
11299 gcc_assert (!TARGET_64BIT);
11301 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
11302 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
11303 fprintf (file, "%s%s%d-%s%d\n",
11304 directive, LPREFIX, value, LPREFIX, rel);
11305 else if (HAVE_AS_GOTOFF_IN_DATA)
11306 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
11308 else if (TARGET_MACHO)
11310 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
11311 machopic_output_function_base_name (file);
11312 fprintf(file, "\n");
11316 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
11317 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
11320 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
11324 ix86_expand_clear (rtx dest)
11328 /* We play register width games, which are only valid after reload. */
11329 gcc_assert (reload_completed);
11331 /* Avoid HImode and its attendant prefix byte. */
11332 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
11333 dest = gen_rtx_REG (SImode, REGNO (dest));
11334 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
11336 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
11337 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
11339 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11340 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
11346 /* X is an unchanging MEM. If it is a constant pool reference, return
11347 the constant pool rtx, else NULL. */
11350 maybe_get_pool_constant (rtx x)
11352 x = ix86_delegitimize_address (XEXP (x, 0));
11354 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
11355 return get_pool_constant (x);
11361 ix86_expand_move (enum machine_mode mode, rtx operands[])
11364 enum tls_model model;
11369 if (GET_CODE (op1) == SYMBOL_REF)
11371 model = SYMBOL_REF_TLS_MODEL (op1);
11374 op1 = legitimize_tls_address (op1, model, true);
11375 op1 = force_operand (op1, op0);
11379 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11380 && SYMBOL_REF_DLLIMPORT_P (op1))
11381 op1 = legitimize_dllimport_symbol (op1, false);
11383 else if (GET_CODE (op1) == CONST
11384 && GET_CODE (XEXP (op1, 0)) == PLUS
11385 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
11387 rtx addend = XEXP (XEXP (op1, 0), 1);
11388 rtx symbol = XEXP (XEXP (op1, 0), 0);
11391 model = SYMBOL_REF_TLS_MODEL (symbol);
11393 tmp = legitimize_tls_address (symbol, model, true);
11394 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11395 && SYMBOL_REF_DLLIMPORT_P (symbol))
11396 tmp = legitimize_dllimport_symbol (symbol, true);
11400 tmp = force_operand (tmp, NULL);
11401 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
11402 op0, 1, OPTAB_DIRECT);
11408 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
11410 if (TARGET_MACHO && !TARGET_64BIT)
11415 rtx temp = ((reload_in_progress
11416 || ((op0 && REG_P (op0))
11418 ? op0 : gen_reg_rtx (Pmode));
11419 op1 = machopic_indirect_data_reference (op1, temp);
11420 op1 = machopic_legitimize_pic_address (op1, mode,
11421 temp == op1 ? 0 : temp);
11423 else if (MACHOPIC_INDIRECT)
11424 op1 = machopic_indirect_data_reference (op1, 0);
11432 op1 = force_reg (Pmode, op1);
11433 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
11435 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
11436 op1 = legitimize_pic_address (op1, reg);
11445 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
11446 || !push_operand (op0, mode))
11448 op1 = force_reg (mode, op1);
11450 if (push_operand (op0, mode)
11451 && ! general_no_elim_operand (op1, mode))
11452 op1 = copy_to_mode_reg (mode, op1);
11454 /* Force large constants in 64bit compilation into register
11455 to get them CSEed. */
11456 if (can_create_pseudo_p ()
11457 && (mode == DImode) && TARGET_64BIT
11458 && immediate_operand (op1, mode)
11459 && !x86_64_zext_immediate_operand (op1, VOIDmode)
11460 && !register_operand (op0, mode)
11462 op1 = copy_to_mode_reg (mode, op1);
11464 if (can_create_pseudo_p ()
11465 && FLOAT_MODE_P (mode)
11466 && GET_CODE (op1) == CONST_DOUBLE)
11468 /* If we are loading a floating point constant to a register,
11469 force the value to memory now, since we'll get better code
11470 out the back end. */
11472 op1 = validize_mem (force_const_mem (mode, op1));
11473 if (!register_operand (op0, mode))
11475 rtx temp = gen_reg_rtx (mode);
11476 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
11477 emit_move_insn (op0, temp);
11483 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
11487 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
11489 rtx op0 = operands[0], op1 = operands[1];
11490 unsigned int align = GET_MODE_ALIGNMENT (mode);
11492 /* Force constants other than zero into memory. We do not know how
11493 the instructions used to build constants modify the upper 64 bits
11494 of the register, once we have that information we may be able
11495 to handle some of them more efficiently. */
11496 if (can_create_pseudo_p ()
11497 && register_operand (op0, mode)
11498 && (CONSTANT_P (op1)
11499 || (GET_CODE (op1) == SUBREG
11500 && CONSTANT_P (SUBREG_REG (op1))))
11501 && standard_sse_constant_p (op1) <= 0)
11502 op1 = validize_mem (force_const_mem (mode, op1));
11504 /* We need to check memory alignment for SSE mode since attribute
11505 can make operands unaligned. */
11506 if (can_create_pseudo_p ()
11507 && SSE_REG_MODE_P (mode)
11508 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
11509 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
11513 /* ix86_expand_vector_move_misalign() does not like constants ... */
11514 if (CONSTANT_P (op1)
11515 || (GET_CODE (op1) == SUBREG
11516 && CONSTANT_P (SUBREG_REG (op1))))
11517 op1 = validize_mem (force_const_mem (mode, op1));
11519 /* ... nor both arguments in memory. */
11520 if (!register_operand (op0, mode)
11521 && !register_operand (op1, mode))
11522 op1 = force_reg (mode, op1);
11524 tmp[0] = op0; tmp[1] = op1;
11525 ix86_expand_vector_move_misalign (mode, tmp);
11529 /* Make operand1 a register if it isn't already. */
11530 if (can_create_pseudo_p ()
11531 && !register_operand (op0, mode)
11532 && !register_operand (op1, mode))
11534 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
11538 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
11541 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
11542 straight to ix86_expand_vector_move. */
11543 /* Code generation for scalar reg-reg moves of single and double precision data:
11544 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
11548 if (x86_sse_partial_reg_dependency == true)
11553 Code generation for scalar loads of double precision data:
11554 if (x86_sse_split_regs == true)
11555 movlpd mem, reg (gas syntax)
11559 Code generation for unaligned packed loads of single precision data
11560 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
11561 if (x86_sse_unaligned_move_optimal)
11564 if (x86_sse_partial_reg_dependency == true)
11576 Code generation for unaligned packed loads of double precision data
11577 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
11578 if (x86_sse_unaligned_move_optimal)
11581 if (x86_sse_split_regs == true)
11594 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
11603 /* If we're optimizing for size, movups is the smallest. */
11606 op0 = gen_lowpart (V4SFmode, op0);
11607 op1 = gen_lowpart (V4SFmode, op1);
11608 emit_insn (gen_sse_movups (op0, op1));
11612 /* ??? If we have typed data, then it would appear that using
11613 movdqu is the only way to get unaligned data loaded with
11615 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11617 op0 = gen_lowpart (V16QImode, op0);
11618 op1 = gen_lowpart (V16QImode, op1);
11619 emit_insn (gen_sse2_movdqu (op0, op1));
11623 if (TARGET_SSE2 && mode == V2DFmode)
11627 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
11629 op0 = gen_lowpart (V2DFmode, op0);
11630 op1 = gen_lowpart (V2DFmode, op1);
11631 emit_insn (gen_sse2_movupd (op0, op1));
11635 /* When SSE registers are split into halves, we can avoid
11636 writing to the top half twice. */
11637 if (TARGET_SSE_SPLIT_REGS)
11639 emit_clobber (op0);
11644 /* ??? Not sure about the best option for the Intel chips.
11645 The following would seem to satisfy; the register is
11646 entirely cleared, breaking the dependency chain. We
11647 then store to the upper half, with a dependency depth
11648 of one. A rumor has it that Intel recommends two movsd
11649 followed by an unpacklpd, but this is unconfirmed. And
11650 given that the dependency depth of the unpacklpd would
11651 still be one, I'm not sure why this would be better. */
11652 zero = CONST0_RTX (V2DFmode);
11655 m = adjust_address (op1, DFmode, 0);
11656 emit_insn (gen_sse2_loadlpd (op0, zero, m));
11657 m = adjust_address (op1, DFmode, 8);
11658 emit_insn (gen_sse2_loadhpd (op0, op0, m));
11662 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
11664 op0 = gen_lowpart (V4SFmode, op0);
11665 op1 = gen_lowpart (V4SFmode, op1);
11666 emit_insn (gen_sse_movups (op0, op1));
11670 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
11671 emit_move_insn (op0, CONST0_RTX (mode));
11673 emit_clobber (op0);
11675 if (mode != V4SFmode)
11676 op0 = gen_lowpart (V4SFmode, op0);
11677 m = adjust_address (op1, V2SFmode, 0);
11678 emit_insn (gen_sse_loadlps (op0, op0, m));
11679 m = adjust_address (op1, V2SFmode, 8);
11680 emit_insn (gen_sse_loadhps (op0, op0, m));
11683 else if (MEM_P (op0))
11685 /* If we're optimizing for size, movups is the smallest. */
11688 op0 = gen_lowpart (V4SFmode, op0);
11689 op1 = gen_lowpart (V4SFmode, op1);
11690 emit_insn (gen_sse_movups (op0, op1));
11694 /* ??? Similar to above, only less clear because of quote
11695 typeless stores unquote. */
11696 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
11697 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11699 op0 = gen_lowpart (V16QImode, op0);
11700 op1 = gen_lowpart (V16QImode, op1);
11701 emit_insn (gen_sse2_movdqu (op0, op1));
11705 if (TARGET_SSE2 && mode == V2DFmode)
11707 m = adjust_address (op0, DFmode, 0);
11708 emit_insn (gen_sse2_storelpd (m, op1));
11709 m = adjust_address (op0, DFmode, 8);
11710 emit_insn (gen_sse2_storehpd (m, op1));
11714 if (mode != V4SFmode)
11715 op1 = gen_lowpart (V4SFmode, op1);
11716 m = adjust_address (op0, V2SFmode, 0);
11717 emit_insn (gen_sse_storelps (m, op1));
11718 m = adjust_address (op0, V2SFmode, 8);
11719 emit_insn (gen_sse_storehps (m, op1));
11723 gcc_unreachable ();
11726 /* Expand a push in MODE. This is some mode for which we do not support
11727 proper push instructions, at least from the registers that we expect
11728 the value to live in. */
11731 ix86_expand_push (enum machine_mode mode, rtx x)
11735 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
11736 GEN_INT (-GET_MODE_SIZE (mode)),
11737 stack_pointer_rtx, 1, OPTAB_DIRECT);
11738 if (tmp != stack_pointer_rtx)
11739 emit_move_insn (stack_pointer_rtx, tmp);
11741 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
11742 emit_move_insn (tmp, x);
11745 /* Helper function of ix86_fixup_binary_operands to canonicalize
11746 operand order. Returns true if the operands should be swapped. */
11749 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
11752 rtx dst = operands[0];
11753 rtx src1 = operands[1];
11754 rtx src2 = operands[2];
11756 /* If the operation is not commutative, we can't do anything. */
11757 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
11760 /* Highest priority is that src1 should match dst. */
11761 if (rtx_equal_p (dst, src1))
11763 if (rtx_equal_p (dst, src2))
11766 /* Next highest priority is that immediate constants come second. */
11767 if (immediate_operand (src2, mode))
11769 if (immediate_operand (src1, mode))
11772 /* Lowest priority is that memory references should come second. */
11782 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
11783 destination to use for the operation. If different from the true
11784 destination in operands[0], a copy operation will be required. */
11787 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
11790 rtx dst = operands[0];
11791 rtx src1 = operands[1];
11792 rtx src2 = operands[2];
11794 /* Canonicalize operand order. */
11795 if (ix86_swap_binary_operands_p (code, mode, operands))
11799 /* It is invalid to swap operands of different modes. */
11800 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
11807 /* Both source operands cannot be in memory. */
11808 if (MEM_P (src1) && MEM_P (src2))
11810 /* Optimization: Only read from memory once. */
11811 if (rtx_equal_p (src1, src2))
11813 src2 = force_reg (mode, src2);
11817 src2 = force_reg (mode, src2);
11820 /* If the destination is memory, and we do not have matching source
11821 operands, do things in registers. */
11822 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
11823 dst = gen_reg_rtx (mode);
11825 /* Source 1 cannot be a constant. */
11826 if (CONSTANT_P (src1))
11827 src1 = force_reg (mode, src1);
11829 /* Source 1 cannot be a non-matching memory. */
11830 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
11831 src1 = force_reg (mode, src1);
11833 operands[1] = src1;
11834 operands[2] = src2;
11838 /* Similarly, but assume that the destination has already been
11839 set up properly. */
11842 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
11843 enum machine_mode mode, rtx operands[])
11845 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
11846 gcc_assert (dst == operands[0]);
11849 /* Attempt to expand a binary operator. Make the expansion closer to the
11850 actual machine, then just general_operand, which will allow 3 separate
11851 memory references (one output, two input) in a single insn. */
11854 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
11857 rtx src1, src2, dst, op, clob;
11859 dst = ix86_fixup_binary_operands (code, mode, operands);
11860 src1 = operands[1];
11861 src2 = operands[2];
11863 /* Emit the instruction. */
11865 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
11866 if (reload_in_progress)
11868 /* Reload doesn't know about the flags register, and doesn't know that
11869 it doesn't want to clobber it. We can only do this with PLUS. */
11870 gcc_assert (code == PLUS);
11875 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11876 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
11879 /* Fix up the destination if needed. */
11880 if (dst != operands[0])
11881 emit_move_insn (operands[0], dst);
11884 /* Return TRUE or FALSE depending on whether the binary operator meets the
11885 appropriate constraints. */
11888 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
11891 rtx dst = operands[0];
11892 rtx src1 = operands[1];
11893 rtx src2 = operands[2];
11895 /* Both source operands cannot be in memory. */
11896 if (MEM_P (src1) && MEM_P (src2))
11899 /* Canonicalize operand order for commutative operators. */
11900 if (ix86_swap_binary_operands_p (code, mode, operands))
11907 /* If the destination is memory, we must have a matching source operand. */
11908 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
11911 /* Source 1 cannot be a constant. */
11912 if (CONSTANT_P (src1))
11915 /* Source 1 cannot be a non-matching memory. */
11916 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
11922 /* Attempt to expand a unary operator. Make the expansion closer to the
11923 actual machine, then just general_operand, which will allow 2 separate
11924 memory references (one output, one input) in a single insn. */
11927 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
11930 int matching_memory;
11931 rtx src, dst, op, clob;
11936 /* If the destination is memory, and we do not have matching source
11937 operands, do things in registers. */
11938 matching_memory = 0;
11941 if (rtx_equal_p (dst, src))
11942 matching_memory = 1;
11944 dst = gen_reg_rtx (mode);
11947 /* When source operand is memory, destination must match. */
11948 if (MEM_P (src) && !matching_memory)
11949 src = force_reg (mode, src);
11951 /* Emit the instruction. */
11953 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
11954 if (reload_in_progress || code == NOT)
11956 /* Reload doesn't know about the flags register, and doesn't know that
11957 it doesn't want to clobber it. */
11958 gcc_assert (code == NOT);
11963 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11964 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
11967 /* Fix up the destination if needed. */
11968 if (dst != operands[0])
11969 emit_move_insn (operands[0], dst);
11972 /* Return TRUE or FALSE depending on whether the unary operator meets the
11973 appropriate constraints. */
11976 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
11977 enum machine_mode mode ATTRIBUTE_UNUSED,
11978 rtx operands[2] ATTRIBUTE_UNUSED)
11980 /* If one of operands is memory, source and destination must match. */
11981 if ((MEM_P (operands[0])
11982 || MEM_P (operands[1]))
11983 && ! rtx_equal_p (operands[0], operands[1]))
11988 /* Post-reload splitter for converting an SF or DFmode value in an
11989 SSE register into an unsigned SImode. */
11992 ix86_split_convert_uns_si_sse (rtx operands[])
11994 enum machine_mode vecmode;
11995 rtx value, large, zero_or_two31, input, two31, x;
11997 large = operands[1];
11998 zero_or_two31 = operands[2];
11999 input = operands[3];
12000 two31 = operands[4];
12001 vecmode = GET_MODE (large);
12002 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
12004 /* Load up the value into the low element. We must ensure that the other
12005 elements are valid floats -- zero is the easiest such value. */
12008 if (vecmode == V4SFmode)
12009 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
12011 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
12015 input = gen_rtx_REG (vecmode, REGNO (input));
12016 emit_move_insn (value, CONST0_RTX (vecmode));
12017 if (vecmode == V4SFmode)
12018 emit_insn (gen_sse_movss (value, value, input));
12020 emit_insn (gen_sse2_movsd (value, value, input));
12023 emit_move_insn (large, two31);
12024 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
12026 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
12027 emit_insn (gen_rtx_SET (VOIDmode, large, x));
12029 x = gen_rtx_AND (vecmode, zero_or_two31, large);
12030 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
12032 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
12033 emit_insn (gen_rtx_SET (VOIDmode, value, x));
12035 large = gen_rtx_REG (V4SImode, REGNO (large));
12036 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
12038 x = gen_rtx_REG (V4SImode, REGNO (value));
12039 if (vecmode == V4SFmode)
12040 emit_insn (gen_sse2_cvttps2dq (x, value));
12042 emit_insn (gen_sse2_cvttpd2dq (x, value));
12045 emit_insn (gen_xorv4si3 (value, value, large));
12048 /* Convert an unsigned DImode value into a DFmode, using only SSE.
12049 Expects the 64-bit DImode to be supplied in a pair of integral
12050 registers. Requires SSE2; will use SSE3 if available. For x86_32,
12051 -mfpmath=sse, !optimize_size only. */
12054 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
12056 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
12057 rtx int_xmm, fp_xmm;
12058 rtx biases, exponents;
12061 int_xmm = gen_reg_rtx (V4SImode);
12062 if (TARGET_INTER_UNIT_MOVES)
12063 emit_insn (gen_movdi_to_sse (int_xmm, input));
12064 else if (TARGET_SSE_SPLIT_REGS)
12066 emit_clobber (int_xmm);
12067 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
12071 x = gen_reg_rtx (V2DImode);
12072 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
12073 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
12076 x = gen_rtx_CONST_VECTOR (V4SImode,
12077 gen_rtvec (4, GEN_INT (0x43300000UL),
12078 GEN_INT (0x45300000UL),
12079 const0_rtx, const0_rtx));
12080 exponents = validize_mem (force_const_mem (V4SImode, x));
12082 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
12083 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
12085 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
12086 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
12087 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
12088 (0x1.0p84 + double(fp_value_hi_xmm)).
12089 Note these exponents differ by 32. */
12091 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
12093 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
12094 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
12095 real_ldexp (&bias_lo_rvt, &dconst1, 52);
12096 real_ldexp (&bias_hi_rvt, &dconst1, 84);
12097 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
12098 x = const_double_from_real_value (bias_hi_rvt, DFmode);
12099 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
12100 biases = validize_mem (force_const_mem (V2DFmode, biases));
12101 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
12103 /* Add the upper and lower DFmode values together. */
12105 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
12108 x = copy_to_mode_reg (V2DFmode, fp_xmm);
12109 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
12110 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
12113 ix86_expand_vector_extract (false, target, fp_xmm, 0);
12116 /* Not used, but eases macroization of patterns. */
12118 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
12119 rtx input ATTRIBUTE_UNUSED)
12121 gcc_unreachable ();
12124 /* Convert an unsigned SImode value into a DFmode. Only currently used
12125 for SSE, but applicable anywhere. */
12128 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
12130 REAL_VALUE_TYPE TWO31r;
12133 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
12134 NULL, 1, OPTAB_DIRECT);
12136 fp = gen_reg_rtx (DFmode);
12137 emit_insn (gen_floatsidf2 (fp, x));
12139 real_ldexp (&TWO31r, &dconst1, 31);
12140 x = const_double_from_real_value (TWO31r, DFmode);
12142 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
12144 emit_move_insn (target, x);
12147 /* Convert a signed DImode value into a DFmode. Only used for SSE in
12148 32-bit mode; otherwise we have a direct convert instruction. */
12151 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
12153 REAL_VALUE_TYPE TWO32r;
12154 rtx fp_lo, fp_hi, x;
12156 fp_lo = gen_reg_rtx (DFmode);
12157 fp_hi = gen_reg_rtx (DFmode);
12159 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
12161 real_ldexp (&TWO32r, &dconst1, 32);
12162 x = const_double_from_real_value (TWO32r, DFmode);
12163 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
12165 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
12167 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
12170 emit_move_insn (target, x);
12173 /* Convert an unsigned SImode value into a SFmode, using only SSE.
12174 For x86_32, -mfpmath=sse, !optimize_size only. */
12176 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
12178 REAL_VALUE_TYPE ONE16r;
12179 rtx fp_hi, fp_lo, int_hi, int_lo, x;
12181 real_ldexp (&ONE16r, &dconst1, 16);
12182 x = const_double_from_real_value (ONE16r, SFmode);
12183 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
12184 NULL, 0, OPTAB_DIRECT);
12185 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
12186 NULL, 0, OPTAB_DIRECT);
12187 fp_hi = gen_reg_rtx (SFmode);
12188 fp_lo = gen_reg_rtx (SFmode);
12189 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
12190 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
12191 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
12193 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
12195 if (!rtx_equal_p (target, fp_hi))
12196 emit_move_insn (target, fp_hi);
12199 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
12200 then replicate the value for all elements of the vector
12204 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
12211 v = gen_rtvec (4, value, value, value, value);
12212 return gen_rtx_CONST_VECTOR (V4SImode, v);
12216 v = gen_rtvec (2, value, value);
12217 return gen_rtx_CONST_VECTOR (V2DImode, v);
12221 v = gen_rtvec (4, value, value, value, value);
12223 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
12224 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
12225 return gen_rtx_CONST_VECTOR (V4SFmode, v);
12229 v = gen_rtvec (2, value, value);
12231 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
12232 return gen_rtx_CONST_VECTOR (V2DFmode, v);
12235 gcc_unreachable ();
12239 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
12240 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
12241 for an SSE register. If VECT is true, then replicate the mask for
12242 all elements of the vector register. If INVERT is true, then create
12243 a mask excluding the sign bit. */
12246 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
12248 enum machine_mode vec_mode, imode;
12249 HOST_WIDE_INT hi, lo;
12254 /* Find the sign bit, sign extended to 2*HWI. */
12260 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
12261 lo = 0x80000000, hi = lo < 0;
12267 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
12268 if (HOST_BITS_PER_WIDE_INT >= 64)
12269 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
12271 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
12276 vec_mode = VOIDmode;
12277 if (HOST_BITS_PER_WIDE_INT >= 64)
12280 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
12287 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
12291 lo = ~lo, hi = ~hi;
12297 mask = immed_double_const (lo, hi, imode);
12299 vec = gen_rtvec (2, v, mask);
12300 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
12301 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
12308 gcc_unreachable ();
12312 lo = ~lo, hi = ~hi;
12314 /* Force this value into the low part of a fp vector constant. */
12315 mask = immed_double_const (lo, hi, imode);
12316 mask = gen_lowpart (mode, mask);
12318 if (vec_mode == VOIDmode)
12319 return force_reg (mode, mask);
12321 v = ix86_build_const_vector (mode, vect, mask);
12322 return force_reg (vec_mode, v);
12325 /* Generate code for floating point ABS or NEG. */
12328 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
12331 rtx mask, set, use, clob, dst, src;
12332 bool use_sse = false;
12333 bool vector_mode = VECTOR_MODE_P (mode);
12334 enum machine_mode elt_mode = mode;
12338 elt_mode = GET_MODE_INNER (mode);
12341 else if (mode == TFmode)
12343 else if (TARGET_SSE_MATH)
12344 use_sse = SSE_FLOAT_MODE_P (mode);
12346 /* NEG and ABS performed with SSE use bitwise mask operations.
12347 Create the appropriate mask now. */
12349 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
12358 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
12359 set = gen_rtx_SET (VOIDmode, dst, set);
12364 set = gen_rtx_fmt_e (code, mode, src);
12365 set = gen_rtx_SET (VOIDmode, dst, set);
12368 use = gen_rtx_USE (VOIDmode, mask);
12369 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12370 emit_insn (gen_rtx_PARALLEL (VOIDmode,
12371 gen_rtvec (3, set, use, clob)));
12378 /* Expand a copysign operation. Special case operand 0 being a constant. */
12381 ix86_expand_copysign (rtx operands[])
12383 enum machine_mode mode;
12384 rtx dest, op0, op1, mask, nmask;
12386 dest = operands[0];
12390 mode = GET_MODE (dest);
12392 if (GET_CODE (op0) == CONST_DOUBLE)
12394 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
12396 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
12397 op0 = simplify_unary_operation (ABS, mode, op0, mode);
12399 if (mode == SFmode || mode == DFmode)
12401 enum machine_mode vmode;
12403 vmode = mode == SFmode ? V4SFmode : V2DFmode;
12405 if (op0 == CONST0_RTX (mode))
12406 op0 = CONST0_RTX (vmode);
12411 if (mode == SFmode)
12412 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
12413 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
12415 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
12417 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
12420 else if (op0 != CONST0_RTX (mode))
12421 op0 = force_reg (mode, op0);
12423 mask = ix86_build_signbit_mask (mode, 0, 0);
12425 if (mode == SFmode)
12426 copysign_insn = gen_copysignsf3_const;
12427 else if (mode == DFmode)
12428 copysign_insn = gen_copysigndf3_const;
12430 copysign_insn = gen_copysigntf3_const;
12432 emit_insn (copysign_insn (dest, op0, op1, mask));
12436 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
12438 nmask = ix86_build_signbit_mask (mode, 0, 1);
12439 mask = ix86_build_signbit_mask (mode, 0, 0);
12441 if (mode == SFmode)
12442 copysign_insn = gen_copysignsf3_var;
12443 else if (mode == DFmode)
12444 copysign_insn = gen_copysigndf3_var;
12446 copysign_insn = gen_copysigntf3_var;
12448 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
12452 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
12453 be a constant, and so has already been expanded into a vector constant. */
12456 ix86_split_copysign_const (rtx operands[])
12458 enum machine_mode mode, vmode;
12459 rtx dest, op0, op1, mask, x;
12461 dest = operands[0];
12464 mask = operands[3];
12466 mode = GET_MODE (dest);
12467 vmode = GET_MODE (mask);
12469 dest = simplify_gen_subreg (vmode, dest, mode, 0);
12470 x = gen_rtx_AND (vmode, dest, mask);
12471 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12473 if (op0 != CONST0_RTX (vmode))
12475 x = gen_rtx_IOR (vmode, dest, op0);
12476 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12480 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
12481 so we have to do two masks. */
12484 ix86_split_copysign_var (rtx operands[])
12486 enum machine_mode mode, vmode;
12487 rtx dest, scratch, op0, op1, mask, nmask, x;
12489 dest = operands[0];
12490 scratch = operands[1];
12493 nmask = operands[4];
12494 mask = operands[5];
12496 mode = GET_MODE (dest);
12497 vmode = GET_MODE (mask);
12499 if (rtx_equal_p (op0, op1))
12501 /* Shouldn't happen often (it's useless, obviously), but when it does
12502 we'd generate incorrect code if we continue below. */
12503 emit_move_insn (dest, op0);
12507 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
12509 gcc_assert (REGNO (op1) == REGNO (scratch));
12511 x = gen_rtx_AND (vmode, scratch, mask);
12512 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
12515 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
12516 x = gen_rtx_NOT (vmode, dest);
12517 x = gen_rtx_AND (vmode, x, op0);
12518 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12522 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
12524 x = gen_rtx_AND (vmode, scratch, mask);
12526 else /* alternative 2,4 */
12528 gcc_assert (REGNO (mask) == REGNO (scratch));
12529 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
12530 x = gen_rtx_AND (vmode, scratch, op1);
12532 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
12534 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
12536 dest = simplify_gen_subreg (vmode, op0, mode, 0);
12537 x = gen_rtx_AND (vmode, dest, nmask);
12539 else /* alternative 3,4 */
12541 gcc_assert (REGNO (nmask) == REGNO (dest));
12543 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
12544 x = gen_rtx_AND (vmode, dest, op0);
12546 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12549 x = gen_rtx_IOR (vmode, dest, scratch);
12550 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12553 /* Return TRUE or FALSE depending on whether the first SET in INSN
12554 has source and destination with matching CC modes, and that the
12555 CC mode is at least as constrained as REQ_MODE. */
12558 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
12561 enum machine_mode set_mode;
12563 set = PATTERN (insn);
12564 if (GET_CODE (set) == PARALLEL)
12565 set = XVECEXP (set, 0, 0);
12566 gcc_assert (GET_CODE (set) == SET);
12567 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
12569 set_mode = GET_MODE (SET_DEST (set));
12573 if (req_mode != CCNOmode
12574 && (req_mode != CCmode
12575 || XEXP (SET_SRC (set), 1) != const0_rtx))
12579 if (req_mode == CCGCmode)
12583 if (req_mode == CCGOCmode || req_mode == CCNOmode)
12587 if (req_mode == CCZmode)
12594 gcc_unreachable ();
12597 return (GET_MODE (SET_SRC (set)) == set_mode);
12600 /* Generate insn patterns to do an integer compare of OPERANDS. */
12603 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
12605 enum machine_mode cmpmode;
12608 cmpmode = SELECT_CC_MODE (code, op0, op1);
12609 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
12611 /* This is very simple, but making the interface the same as in the
12612 FP case makes the rest of the code easier. */
12613 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
12614 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
12616 /* Return the test that should be put into the flags user, i.e.
12617 the bcc, scc, or cmov instruction. */
12618 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
12621 /* Figure out whether to use ordered or unordered fp comparisons.
12622 Return the appropriate mode to use. */
12625 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
12627 /* ??? In order to make all comparisons reversible, we do all comparisons
12628 non-trapping when compiling for IEEE. Once gcc is able to distinguish
12629 all forms trapping and nontrapping comparisons, we can make inequality
12630 comparisons trapping again, since it results in better code when using
12631 FCOM based compares. */
12632 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
12636 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
12638 enum machine_mode mode = GET_MODE (op0);
12640 if (SCALAR_FLOAT_MODE_P (mode))
12642 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12643 return ix86_fp_compare_mode (code);
12648 /* Only zero flag is needed. */
12649 case EQ: /* ZF=0 */
12650 case NE: /* ZF!=0 */
12652 /* Codes needing carry flag. */
12653 case GEU: /* CF=0 */
12654 case LTU: /* CF=1 */
12655 /* Detect overflow checks. They need just the carry flag. */
12656 if (GET_CODE (op0) == PLUS
12657 && rtx_equal_p (op1, XEXP (op0, 0)))
12661 case GTU: /* CF=0 & ZF=0 */
12662 case LEU: /* CF=1 | ZF=1 */
12663 /* Detect overflow checks. They need just the carry flag. */
12664 if (GET_CODE (op0) == MINUS
12665 && rtx_equal_p (op1, XEXP (op0, 0)))
12669 /* Codes possibly doable only with sign flag when
12670 comparing against zero. */
12671 case GE: /* SF=OF or SF=0 */
12672 case LT: /* SF<>OF or SF=1 */
12673 if (op1 == const0_rtx)
12676 /* For other cases Carry flag is not required. */
12678 /* Codes doable only with sign flag when comparing
12679 against zero, but we miss jump instruction for it
12680 so we need to use relational tests against overflow
12681 that thus needs to be zero. */
12682 case GT: /* ZF=0 & SF=OF */
12683 case LE: /* ZF=1 | SF<>OF */
12684 if (op1 == const0_rtx)
12688 /* strcmp pattern do (use flags) and combine may ask us for proper
12693 gcc_unreachable ();
12697 /* Return the fixed registers used for condition codes. */
12700 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
12707 /* If two condition code modes are compatible, return a condition code
12708 mode which is compatible with both. Otherwise, return
12711 static enum machine_mode
12712 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
12717 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
12720 if ((m1 == CCGCmode && m2 == CCGOCmode)
12721 || (m1 == CCGOCmode && m2 == CCGCmode))
12727 gcc_unreachable ();
12757 /* These are only compatible with themselves, which we already
12763 /* Split comparison code CODE into comparisons we can do using branch
12764 instructions. BYPASS_CODE is comparison code for branch that will
12765 branch around FIRST_CODE and SECOND_CODE. If some of branches
12766 is not required, set value to UNKNOWN.
12767 We never require more than two branches. */
12770 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
12771 enum rtx_code *first_code,
12772 enum rtx_code *second_code)
12774 *first_code = code;
12775 *bypass_code = UNKNOWN;
12776 *second_code = UNKNOWN;
12778 /* The fcomi comparison sets flags as follows:
12788 case GT: /* GTU - CF=0 & ZF=0 */
12789 case GE: /* GEU - CF=0 */
12790 case ORDERED: /* PF=0 */
12791 case UNORDERED: /* PF=1 */
12792 case UNEQ: /* EQ - ZF=1 */
12793 case UNLT: /* LTU - CF=1 */
12794 case UNLE: /* LEU - CF=1 | ZF=1 */
12795 case LTGT: /* EQ - ZF=0 */
12797 case LT: /* LTU - CF=1 - fails on unordered */
12798 *first_code = UNLT;
12799 *bypass_code = UNORDERED;
12801 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
12802 *first_code = UNLE;
12803 *bypass_code = UNORDERED;
12805 case EQ: /* EQ - ZF=1 - fails on unordered */
12806 *first_code = UNEQ;
12807 *bypass_code = UNORDERED;
12809 case NE: /* NE - ZF=0 - fails on unordered */
12810 *first_code = LTGT;
12811 *second_code = UNORDERED;
12813 case UNGE: /* GEU - CF=0 - fails on unordered */
12815 *second_code = UNORDERED;
12817 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
12819 *second_code = UNORDERED;
12822 gcc_unreachable ();
12824 if (!TARGET_IEEE_FP)
12826 *second_code = UNKNOWN;
12827 *bypass_code = UNKNOWN;
12831 /* Return cost of comparison done fcom + arithmetics operations on AX.
12832 All following functions do use number of instructions as a cost metrics.
12833 In future this should be tweaked to compute bytes for optimize_size and
12834 take into account performance of various instructions on various CPUs. */
12836 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
12838 if (!TARGET_IEEE_FP)
12840 /* The cost of code output by ix86_expand_fp_compare. */
12864 gcc_unreachable ();
12868 /* Return cost of comparison done using fcomi operation.
12869 See ix86_fp_comparison_arithmetics_cost for the metrics. */
12871 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
12873 enum rtx_code bypass_code, first_code, second_code;
12874 /* Return arbitrarily high cost when instruction is not supported - this
12875 prevents gcc from using it. */
12878 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12879 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
12882 /* Return cost of comparison done using sahf operation.
12883 See ix86_fp_comparison_arithmetics_cost for the metrics. */
12885 ix86_fp_comparison_sahf_cost (enum rtx_code code)
12887 enum rtx_code bypass_code, first_code, second_code;
12888 /* Return arbitrarily high cost when instruction is not preferred - this
12889 avoids gcc from using it. */
12890 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
12892 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12893 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
12896 /* Compute cost of the comparison done using any method.
12897 See ix86_fp_comparison_arithmetics_cost for the metrics. */
12899 ix86_fp_comparison_cost (enum rtx_code code)
12901 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
12904 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
12905 sahf_cost = ix86_fp_comparison_sahf_cost (code);
12907 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
12908 if (min > sahf_cost)
12910 if (min > fcomi_cost)
12915 /* Return true if we should use an FCOMI instruction for this
12919 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
12921 enum rtx_code swapped_code = swap_condition (code);
12923 return ((ix86_fp_comparison_cost (code)
12924 == ix86_fp_comparison_fcomi_cost (code))
12925 || (ix86_fp_comparison_cost (swapped_code)
12926 == ix86_fp_comparison_fcomi_cost (swapped_code)));
12929 /* Swap, force into registers, or otherwise massage the two operands
12930 to a fp comparison. The operands are updated in place; the new
12931 comparison code is returned. */
12933 static enum rtx_code
12934 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
12936 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
12937 rtx op0 = *pop0, op1 = *pop1;
12938 enum machine_mode op_mode = GET_MODE (op0);
12939 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
12941 /* All of the unordered compare instructions only work on registers.
12942 The same is true of the fcomi compare instructions. The XFmode
12943 compare instructions require registers except when comparing
12944 against zero or when converting operand 1 from fixed point to
12948 && (fpcmp_mode == CCFPUmode
12949 || (op_mode == XFmode
12950 && ! (standard_80387_constant_p (op0) == 1
12951 || standard_80387_constant_p (op1) == 1)
12952 && GET_CODE (op1) != FLOAT)
12953 || ix86_use_fcomi_compare (code)))
12955 op0 = force_reg (op_mode, op0);
12956 op1 = force_reg (op_mode, op1);
12960 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
12961 things around if they appear profitable, otherwise force op0
12962 into a register. */
12964 if (standard_80387_constant_p (op0) == 0
12966 && ! (standard_80387_constant_p (op1) == 0
12970 tmp = op0, op0 = op1, op1 = tmp;
12971 code = swap_condition (code);
12975 op0 = force_reg (op_mode, op0);
12977 if (CONSTANT_P (op1))
12979 int tmp = standard_80387_constant_p (op1);
12981 op1 = validize_mem (force_const_mem (op_mode, op1));
12985 op1 = force_reg (op_mode, op1);
12988 op1 = force_reg (op_mode, op1);
12992 /* Try to rearrange the comparison to make it cheaper. */
12993 if (ix86_fp_comparison_cost (code)
12994 > ix86_fp_comparison_cost (swap_condition (code))
12995 && (REG_P (op1) || can_create_pseudo_p ()))
12998 tmp = op0, op0 = op1, op1 = tmp;
12999 code = swap_condition (code);
13001 op0 = force_reg (op_mode, op0);
13009 /* Convert comparison codes we use to represent FP comparison to integer
13010 code that will result in proper branch. Return UNKNOWN if no such code
13014 ix86_fp_compare_code_to_integer (enum rtx_code code)
13043 /* Generate insn patterns to do a floating point compare of OPERANDS. */
13046 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
13047 rtx *second_test, rtx *bypass_test)
13049 enum machine_mode fpcmp_mode, intcmp_mode;
13051 int cost = ix86_fp_comparison_cost (code);
13052 enum rtx_code bypass_code, first_code, second_code;
13054 fpcmp_mode = ix86_fp_compare_mode (code);
13055 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
13058 *second_test = NULL_RTX;
13060 *bypass_test = NULL_RTX;
13062 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13064 /* Do fcomi/sahf based test when profitable. */
13065 if (ix86_fp_comparison_arithmetics_cost (code) > cost
13066 && (bypass_code == UNKNOWN || bypass_test)
13067 && (second_code == UNKNOWN || second_test))
13069 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13070 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
13076 gcc_assert (TARGET_SAHF);
13079 scratch = gen_reg_rtx (HImode);
13080 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
13082 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
13085 /* The FP codes work out to act like unsigned. */
13086 intcmp_mode = fpcmp_mode;
13088 if (bypass_code != UNKNOWN)
13089 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
13090 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13092 if (second_code != UNKNOWN)
13093 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
13094 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13099 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
13100 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13101 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
13103 scratch = gen_reg_rtx (HImode);
13104 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
13106 /* In the unordered case, we have to check C2 for NaN's, which
13107 doesn't happen to work out to anything nice combination-wise.
13108 So do some bit twiddling on the value we've got in AH to come
13109 up with an appropriate set of condition codes. */
13111 intcmp_mode = CCNOmode;
13116 if (code == GT || !TARGET_IEEE_FP)
13118 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13123 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13124 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13125 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
13126 intcmp_mode = CCmode;
13132 if (code == LT && TARGET_IEEE_FP)
13134 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13135 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
13136 intcmp_mode = CCmode;
13141 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
13147 if (code == GE || !TARGET_IEEE_FP)
13149 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
13154 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13155 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13162 if (code == LE && TARGET_IEEE_FP)
13164 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13165 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13166 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13167 intcmp_mode = CCmode;
13172 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13178 if (code == EQ && TARGET_IEEE_FP)
13180 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13181 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13182 intcmp_mode = CCmode;
13187 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13194 if (code == NE && TARGET_IEEE_FP)
13196 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13197 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13203 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13209 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13213 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13218 gcc_unreachable ();
13222 /* Return the test that should be put into the flags user, i.e.
13223 the bcc, scc, or cmov instruction. */
13224 return gen_rtx_fmt_ee (code, VOIDmode,
13225 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13230 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
13233 op0 = ix86_compare_op0;
13234 op1 = ix86_compare_op1;
13237 *second_test = NULL_RTX;
13239 *bypass_test = NULL_RTX;
13241 if (ix86_compare_emitted)
13243 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
13244 ix86_compare_emitted = NULL_RTX;
13246 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
13248 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
13249 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
13250 second_test, bypass_test);
13253 ret = ix86_expand_int_compare (code, op0, op1);
13258 /* Return true if the CODE will result in nontrivial jump sequence. */
13260 ix86_fp_jump_nontrivial_p (enum rtx_code code)
13262 enum rtx_code bypass_code, first_code, second_code;
13265 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13266 return bypass_code != UNKNOWN || second_code != UNKNOWN;
13270 ix86_expand_branch (enum rtx_code code, rtx label)
13274 /* If we have emitted a compare insn, go straight to simple.
13275 ix86_expand_compare won't emit anything if ix86_compare_emitted
13277 if (ix86_compare_emitted)
13280 switch (GET_MODE (ix86_compare_op0))
13286 tmp = ix86_expand_compare (code, NULL, NULL);
13287 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13288 gen_rtx_LABEL_REF (VOIDmode, label),
13290 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13299 enum rtx_code bypass_code, first_code, second_code;
13301 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
13302 &ix86_compare_op1);
13304 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13306 /* Check whether we will use the natural sequence with one jump. If
13307 so, we can expand jump early. Otherwise delay expansion by
13308 creating compound insn to not confuse optimizers. */
13309 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
13311 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
13312 gen_rtx_LABEL_REF (VOIDmode, label),
13313 pc_rtx, NULL_RTX, NULL_RTX);
13317 tmp = gen_rtx_fmt_ee (code, VOIDmode,
13318 ix86_compare_op0, ix86_compare_op1);
13319 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13320 gen_rtx_LABEL_REF (VOIDmode, label),
13322 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
13324 use_fcomi = ix86_use_fcomi_compare (code);
13325 vec = rtvec_alloc (3 + !use_fcomi);
13326 RTVEC_ELT (vec, 0) = tmp;
13328 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
13330 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
13333 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
13335 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
13344 /* Expand DImode branch into multiple compare+branch. */
13346 rtx lo[2], hi[2], label2;
13347 enum rtx_code code1, code2, code3;
13348 enum machine_mode submode;
13350 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
13352 tmp = ix86_compare_op0;
13353 ix86_compare_op0 = ix86_compare_op1;
13354 ix86_compare_op1 = tmp;
13355 code = swap_condition (code);
13357 if (GET_MODE (ix86_compare_op0) == DImode)
13359 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
13360 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
13365 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
13366 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
13370 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
13371 avoid two branches. This costs one extra insn, so disable when
13372 optimizing for size. */
13374 if ((code == EQ || code == NE)
13376 || hi[1] == const0_rtx || lo[1] == const0_rtx))
13381 if (hi[1] != const0_rtx)
13382 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
13383 NULL_RTX, 0, OPTAB_WIDEN);
13386 if (lo[1] != const0_rtx)
13387 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
13388 NULL_RTX, 0, OPTAB_WIDEN);
13390 tmp = expand_binop (submode, ior_optab, xor1, xor0,
13391 NULL_RTX, 0, OPTAB_WIDEN);
13393 ix86_compare_op0 = tmp;
13394 ix86_compare_op1 = const0_rtx;
13395 ix86_expand_branch (code, label);
13399 /* Otherwise, if we are doing less-than or greater-or-equal-than,
13400 op1 is a constant and the low word is zero, then we can just
13401 examine the high word. Similarly for low word -1 and
13402 less-or-equal-than or greater-than. */
13404 if (CONST_INT_P (hi[1]))
13407 case LT: case LTU: case GE: case GEU:
13408 if (lo[1] == const0_rtx)
13410 ix86_compare_op0 = hi[0];
13411 ix86_compare_op1 = hi[1];
13412 ix86_expand_branch (code, label);
13416 case LE: case LEU: case GT: case GTU:
13417 if (lo[1] == constm1_rtx)
13419 ix86_compare_op0 = hi[0];
13420 ix86_compare_op1 = hi[1];
13421 ix86_expand_branch (code, label);
13429 /* Otherwise, we need two or three jumps. */
13431 label2 = gen_label_rtx ();
13434 code2 = swap_condition (code);
13435 code3 = unsigned_condition (code);
13439 case LT: case GT: case LTU: case GTU:
13442 case LE: code1 = LT; code2 = GT; break;
13443 case GE: code1 = GT; code2 = LT; break;
13444 case LEU: code1 = LTU; code2 = GTU; break;
13445 case GEU: code1 = GTU; code2 = LTU; break;
13447 case EQ: code1 = UNKNOWN; code2 = NE; break;
13448 case NE: code2 = UNKNOWN; break;
13451 gcc_unreachable ();
13456 * if (hi(a) < hi(b)) goto true;
13457 * if (hi(a) > hi(b)) goto false;
13458 * if (lo(a) < lo(b)) goto true;
13462 ix86_compare_op0 = hi[0];
13463 ix86_compare_op1 = hi[1];
13465 if (code1 != UNKNOWN)
13466 ix86_expand_branch (code1, label);
13467 if (code2 != UNKNOWN)
13468 ix86_expand_branch (code2, label2);
13470 ix86_compare_op0 = lo[0];
13471 ix86_compare_op1 = lo[1];
13472 ix86_expand_branch (code3, label);
13474 if (code2 != UNKNOWN)
13475 emit_label (label2);
13480 gcc_unreachable ();
13484 /* Split branch based on floating point condition. */
13486 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
13487 rtx target1, rtx target2, rtx tmp, rtx pushed)
13489 rtx second, bypass;
13490 rtx label = NULL_RTX;
13492 int bypass_probability = -1, second_probability = -1, probability = -1;
13495 if (target2 != pc_rtx)
13498 code = reverse_condition_maybe_unordered (code);
13503 condition = ix86_expand_fp_compare (code, op1, op2,
13504 tmp, &second, &bypass);
13506 /* Remove pushed operand from stack. */
13508 ix86_free_from_memory (GET_MODE (pushed));
13510 if (split_branch_probability >= 0)
13512 /* Distribute the probabilities across the jumps.
13513 Assume the BYPASS and SECOND to be always test
13515 probability = split_branch_probability;
13517 /* Value of 1 is low enough to make no need for probability
13518 to be updated. Later we may run some experiments and see
13519 if unordered values are more frequent in practice. */
13521 bypass_probability = 1;
13523 second_probability = 1;
13525 if (bypass != NULL_RTX)
13527 label = gen_label_rtx ();
13528 i = emit_jump_insn (gen_rtx_SET
13530 gen_rtx_IF_THEN_ELSE (VOIDmode,
13532 gen_rtx_LABEL_REF (VOIDmode,
13535 if (bypass_probability >= 0)
13537 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13538 GEN_INT (bypass_probability),
13541 i = emit_jump_insn (gen_rtx_SET
13543 gen_rtx_IF_THEN_ELSE (VOIDmode,
13544 condition, target1, target2)));
13545 if (probability >= 0)
13547 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13548 GEN_INT (probability),
13550 if (second != NULL_RTX)
13552 i = emit_jump_insn (gen_rtx_SET
13554 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
13556 if (second_probability >= 0)
13558 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13559 GEN_INT (second_probability),
13562 if (label != NULL_RTX)
13563 emit_label (label);
13567 ix86_expand_setcc (enum rtx_code code, rtx dest)
13569 rtx ret, tmp, tmpreg, equiv;
13570 rtx second_test, bypass_test;
13572 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
13573 return 0; /* FAIL */
13575 gcc_assert (GET_MODE (dest) == QImode);
13577 ret = ix86_expand_compare (code, &second_test, &bypass_test);
13578 PUT_MODE (ret, QImode);
13583 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
13584 if (bypass_test || second_test)
13586 rtx test = second_test;
13588 rtx tmp2 = gen_reg_rtx (QImode);
13591 gcc_assert (!second_test);
13592 test = bypass_test;
13594 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
13596 PUT_MODE (test, QImode);
13597 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
13600 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
13602 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
13605 /* Attach a REG_EQUAL note describing the comparison result. */
13606 if (ix86_compare_op0 && ix86_compare_op1)
13608 equiv = simplify_gen_relational (code, QImode,
13609 GET_MODE (ix86_compare_op0),
13610 ix86_compare_op0, ix86_compare_op1);
13611 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
13614 return 1; /* DONE */
13617 /* Expand comparison setting or clearing carry flag. Return true when
13618 successful and set pop for the operation. */
13620 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
13622 enum machine_mode mode =
13623 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
13625 /* Do not handle DImode compares that go through special path. */
13626 if (mode == (TARGET_64BIT ? TImode : DImode))
13629 if (SCALAR_FLOAT_MODE_P (mode))
13631 rtx second_test = NULL, bypass_test = NULL;
13632 rtx compare_op, compare_seq;
13634 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13636 /* Shortcut: following common codes never translate
13637 into carry flag compares. */
13638 if (code == EQ || code == NE || code == UNEQ || code == LTGT
13639 || code == ORDERED || code == UNORDERED)
13642 /* These comparisons require zero flag; swap operands so they won't. */
13643 if ((code == GT || code == UNLE || code == LE || code == UNGT)
13644 && !TARGET_IEEE_FP)
13649 code = swap_condition (code);
13652 /* Try to expand the comparison and verify that we end up with
13653 carry flag based comparison. This fails to be true only when
13654 we decide to expand comparison using arithmetic that is not
13655 too common scenario. */
13657 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
13658 &second_test, &bypass_test);
13659 compare_seq = get_insns ();
13662 if (second_test || bypass_test)
13665 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13666 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13667 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
13669 code = GET_CODE (compare_op);
13671 if (code != LTU && code != GEU)
13674 emit_insn (compare_seq);
13679 if (!INTEGRAL_MODE_P (mode))
13688 /* Convert a==0 into (unsigned)a<1. */
13691 if (op1 != const0_rtx)
13694 code = (code == EQ ? LTU : GEU);
13697 /* Convert a>b into b<a or a>=b-1. */
13700 if (CONST_INT_P (op1))
13702 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
13703 /* Bail out on overflow. We still can swap operands but that
13704 would force loading of the constant into register. */
13705 if (op1 == const0_rtx
13706 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
13708 code = (code == GTU ? GEU : LTU);
13715 code = (code == GTU ? LTU : GEU);
13719 /* Convert a>=0 into (unsigned)a<0x80000000. */
13722 if (mode == DImode || op1 != const0_rtx)
13724 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
13725 code = (code == LT ? GEU : LTU);
13729 if (mode == DImode || op1 != constm1_rtx)
13731 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
13732 code = (code == LE ? GEU : LTU);
13738 /* Swapping operands may cause constant to appear as first operand. */
13739 if (!nonimmediate_operand (op0, VOIDmode))
13741 if (!can_create_pseudo_p ())
13743 op0 = force_reg (mode, op0);
13745 ix86_compare_op0 = op0;
13746 ix86_compare_op1 = op1;
13747 *pop = ix86_expand_compare (code, NULL, NULL);
13748 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
13753 ix86_expand_int_movcc (rtx operands[])
13755 enum rtx_code code = GET_CODE (operands[1]), compare_code;
13756 rtx compare_seq, compare_op;
13757 rtx second_test, bypass_test;
13758 enum machine_mode mode = GET_MODE (operands[0]);
13759 bool sign_bit_compare_p = false;;
13762 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13763 compare_seq = get_insns ();
13766 compare_code = GET_CODE (compare_op);
13768 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
13769 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
13770 sign_bit_compare_p = true;
13772 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
13773 HImode insns, we'd be swallowed in word prefix ops. */
13775 if ((mode != HImode || TARGET_FAST_PREFIX)
13776 && (mode != (TARGET_64BIT ? TImode : DImode))
13777 && CONST_INT_P (operands[2])
13778 && CONST_INT_P (operands[3]))
13780 rtx out = operands[0];
13781 HOST_WIDE_INT ct = INTVAL (operands[2]);
13782 HOST_WIDE_INT cf = INTVAL (operands[3]);
13783 HOST_WIDE_INT diff;
13786 /* Sign bit compares are better done using shifts than we do by using
13788 if (sign_bit_compare_p
13789 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13790 ix86_compare_op1, &compare_op))
13792 /* Detect overlap between destination and compare sources. */
13795 if (!sign_bit_compare_p)
13797 bool fpcmp = false;
13799 compare_code = GET_CODE (compare_op);
13801 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13802 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13805 compare_code = ix86_fp_compare_code_to_integer (compare_code);
13808 /* To simplify rest of code, restrict to the GEU case. */
13809 if (compare_code == LTU)
13811 HOST_WIDE_INT tmp = ct;
13814 compare_code = reverse_condition (compare_code);
13815 code = reverse_condition (code);
13820 PUT_CODE (compare_op,
13821 reverse_condition_maybe_unordered
13822 (GET_CODE (compare_op)));
13824 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13828 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
13829 || reg_overlap_mentioned_p (out, ix86_compare_op1))
13830 tmp = gen_reg_rtx (mode);
13832 if (mode == DImode)
13833 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
13835 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
13839 if (code == GT || code == GE)
13840 code = reverse_condition (code);
13843 HOST_WIDE_INT tmp = ct;
13848 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
13849 ix86_compare_op1, VOIDmode, 0, -1);
13862 tmp = expand_simple_binop (mode, PLUS,
13864 copy_rtx (tmp), 1, OPTAB_DIRECT);
13875 tmp = expand_simple_binop (mode, IOR,
13877 copy_rtx (tmp), 1, OPTAB_DIRECT);
13879 else if (diff == -1 && ct)
13889 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
13891 tmp = expand_simple_binop (mode, PLUS,
13892 copy_rtx (tmp), GEN_INT (cf),
13893 copy_rtx (tmp), 1, OPTAB_DIRECT);
13901 * andl cf - ct, dest
13911 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
13914 tmp = expand_simple_binop (mode, AND,
13916 gen_int_mode (cf - ct, mode),
13917 copy_rtx (tmp), 1, OPTAB_DIRECT);
13919 tmp = expand_simple_binop (mode, PLUS,
13920 copy_rtx (tmp), GEN_INT (ct),
13921 copy_rtx (tmp), 1, OPTAB_DIRECT);
13924 if (!rtx_equal_p (tmp, out))
13925 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
13927 return 1; /* DONE */
13932 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
13935 tmp = ct, ct = cf, cf = tmp;
13938 if (SCALAR_FLOAT_MODE_P (cmp_mode))
13940 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
13942 /* We may be reversing unordered compare to normal compare, that
13943 is not valid in general (we may convert non-trapping condition
13944 to trapping one), however on i386 we currently emit all
13945 comparisons unordered. */
13946 compare_code = reverse_condition_maybe_unordered (compare_code);
13947 code = reverse_condition_maybe_unordered (code);
13951 compare_code = reverse_condition (compare_code);
13952 code = reverse_condition (code);
13956 compare_code = UNKNOWN;
13957 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
13958 && CONST_INT_P (ix86_compare_op1))
13960 if (ix86_compare_op1 == const0_rtx
13961 && (code == LT || code == GE))
13962 compare_code = code;
13963 else if (ix86_compare_op1 == constm1_rtx)
13967 else if (code == GT)
13972 /* Optimize dest = (op0 < 0) ? -1 : cf. */
13973 if (compare_code != UNKNOWN
13974 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
13975 && (cf == -1 || ct == -1))
13977 /* If lea code below could be used, only optimize
13978 if it results in a 2 insn sequence. */
13980 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
13981 || diff == 3 || diff == 5 || diff == 9)
13982 || (compare_code == LT && ct == -1)
13983 || (compare_code == GE && cf == -1))
13986 * notl op1 (if necessary)
13994 code = reverse_condition (code);
13997 out = emit_store_flag (out, code, ix86_compare_op0,
13998 ix86_compare_op1, VOIDmode, 0, -1);
14000 out = expand_simple_binop (mode, IOR,
14002 out, 1, OPTAB_DIRECT);
14003 if (out != operands[0])
14004 emit_move_insn (operands[0], out);
14006 return 1; /* DONE */
14011 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
14012 || diff == 3 || diff == 5 || diff == 9)
14013 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
14015 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
14021 * lea cf(dest*(ct-cf)),dest
14025 * This also catches the degenerate setcc-only case.
14031 out = emit_store_flag (out, code, ix86_compare_op0,
14032 ix86_compare_op1, VOIDmode, 0, 1);
14035 /* On x86_64 the lea instruction operates on Pmode, so we need
14036 to get arithmetics done in proper mode to match. */
14038 tmp = copy_rtx (out);
14042 out1 = copy_rtx (out);
14043 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
14047 tmp = gen_rtx_PLUS (mode, tmp, out1);
14053 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
14056 if (!rtx_equal_p (tmp, out))
14059 out = force_operand (tmp, copy_rtx (out));
14061 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
14063 if (!rtx_equal_p (out, operands[0]))
14064 emit_move_insn (operands[0], copy_rtx (out));
14066 return 1; /* DONE */
14070 * General case: Jumpful:
14071 * xorl dest,dest cmpl op1, op2
14072 * cmpl op1, op2 movl ct, dest
14073 * setcc dest jcc 1f
14074 * decl dest movl cf, dest
14075 * andl (cf-ct),dest 1:
14078 * Size 20. Size 14.
14080 * This is reasonably steep, but branch mispredict costs are
14081 * high on modern cpus, so consider failing only if optimizing
14085 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14086 && BRANCH_COST >= 2)
14090 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14095 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14097 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14099 /* We may be reversing unordered compare to normal compare,
14100 that is not valid in general (we may convert non-trapping
14101 condition to trapping one), however on i386 we currently
14102 emit all comparisons unordered. */
14103 code = reverse_condition_maybe_unordered (code);
14107 code = reverse_condition (code);
14108 if (compare_code != UNKNOWN)
14109 compare_code = reverse_condition (compare_code);
14113 if (compare_code != UNKNOWN)
14115 /* notl op1 (if needed)
14120 For x < 0 (resp. x <= -1) there will be no notl,
14121 so if possible swap the constants to get rid of the
14123 True/false will be -1/0 while code below (store flag
14124 followed by decrement) is 0/-1, so the constants need
14125 to be exchanged once more. */
14127 if (compare_code == GE || !cf)
14129 code = reverse_condition (code);
14134 HOST_WIDE_INT tmp = cf;
14139 out = emit_store_flag (out, code, ix86_compare_op0,
14140 ix86_compare_op1, VOIDmode, 0, -1);
14144 out = emit_store_flag (out, code, ix86_compare_op0,
14145 ix86_compare_op1, VOIDmode, 0, 1);
14147 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
14148 copy_rtx (out), 1, OPTAB_DIRECT);
14151 out = expand_simple_binop (mode, AND, copy_rtx (out),
14152 gen_int_mode (cf - ct, mode),
14153 copy_rtx (out), 1, OPTAB_DIRECT);
14155 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
14156 copy_rtx (out), 1, OPTAB_DIRECT);
14157 if (!rtx_equal_p (out, operands[0]))
14158 emit_move_insn (operands[0], copy_rtx (out));
14160 return 1; /* DONE */
14164 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14166 /* Try a few things more with specific constants and a variable. */
14169 rtx var, orig_out, out, tmp;
14171 if (BRANCH_COST <= 2)
14172 return 0; /* FAIL */
14174 /* If one of the two operands is an interesting constant, load a
14175 constant with the above and mask it in with a logical operation. */
14177 if (CONST_INT_P (operands[2]))
14180 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
14181 operands[3] = constm1_rtx, op = and_optab;
14182 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
14183 operands[3] = const0_rtx, op = ior_optab;
14185 return 0; /* FAIL */
14187 else if (CONST_INT_P (operands[3]))
14190 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
14191 operands[2] = constm1_rtx, op = and_optab;
14192 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
14193 operands[2] = const0_rtx, op = ior_optab;
14195 return 0; /* FAIL */
14198 return 0; /* FAIL */
14200 orig_out = operands[0];
14201 tmp = gen_reg_rtx (mode);
14204 /* Recurse to get the constant loaded. */
14205 if (ix86_expand_int_movcc (operands) == 0)
14206 return 0; /* FAIL */
14208 /* Mask in the interesting variable. */
14209 out = expand_binop (mode, op, var, tmp, orig_out, 0,
14211 if (!rtx_equal_p (out, orig_out))
14212 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
14214 return 1; /* DONE */
14218 * For comparison with above,
14228 if (! nonimmediate_operand (operands[2], mode))
14229 operands[2] = force_reg (mode, operands[2]);
14230 if (! nonimmediate_operand (operands[3], mode))
14231 operands[3] = force_reg (mode, operands[3]);
14233 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
14235 rtx tmp = gen_reg_rtx (mode);
14236 emit_move_insn (tmp, operands[3]);
14239 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
14241 rtx tmp = gen_reg_rtx (mode);
14242 emit_move_insn (tmp, operands[2]);
14246 if (! register_operand (operands[2], VOIDmode)
14248 || ! register_operand (operands[3], VOIDmode)))
14249 operands[2] = force_reg (mode, operands[2]);
14252 && ! register_operand (operands[3], VOIDmode))
14253 operands[3] = force_reg (mode, operands[3]);
14255 emit_insn (compare_seq);
14256 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14257 gen_rtx_IF_THEN_ELSE (mode,
14258 compare_op, operands[2],
14261 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
14262 gen_rtx_IF_THEN_ELSE (mode,
14264 copy_rtx (operands[3]),
14265 copy_rtx (operands[0]))));
14267 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
14268 gen_rtx_IF_THEN_ELSE (mode,
14270 copy_rtx (operands[2]),
14271 copy_rtx (operands[0]))));
14273 return 1; /* DONE */
14276 /* Swap, force into registers, or otherwise massage the two operands
14277 to an sse comparison with a mask result. Thus we differ a bit from
14278 ix86_prepare_fp_compare_args which expects to produce a flags result.
14280 The DEST operand exists to help determine whether to commute commutative
14281 operators. The POP0/POP1 operands are updated in place. The new
14282 comparison code is returned, or UNKNOWN if not implementable. */
14284 static enum rtx_code
14285 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
14286 rtx *pop0, rtx *pop1)
14294 /* We have no LTGT as an operator. We could implement it with
14295 NE & ORDERED, but this requires an extra temporary. It's
14296 not clear that it's worth it. */
14303 /* These are supported directly. */
14310 /* For commutative operators, try to canonicalize the destination
14311 operand to be first in the comparison - this helps reload to
14312 avoid extra moves. */
14313 if (!dest || !rtx_equal_p (dest, *pop1))
14321 /* These are not supported directly. Swap the comparison operands
14322 to transform into something that is supported. */
14326 code = swap_condition (code);
14330 gcc_unreachable ();
14336 /* Detect conditional moves that exactly match min/max operational
14337 semantics. Note that this is IEEE safe, as long as we don't
14338 interchange the operands.
14340 Returns FALSE if this conditional move doesn't match a MIN/MAX,
14341 and TRUE if the operation is successful and instructions are emitted. */
14344 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
14345 rtx cmp_op1, rtx if_true, rtx if_false)
14347 enum machine_mode mode;
14353 else if (code == UNGE)
14356 if_true = if_false;
14362 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
14364 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
14369 mode = GET_MODE (dest);
14371 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
14372 but MODE may be a vector mode and thus not appropriate. */
14373 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
14375 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
14378 if_true = force_reg (mode, if_true);
14379 v = gen_rtvec (2, if_true, if_false);
14380 tmp = gen_rtx_UNSPEC (mode, v, u);
14384 code = is_min ? SMIN : SMAX;
14385 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
14388 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
14392 /* Expand an sse vector comparison. Return the register with the result. */
14395 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
14396 rtx op_true, rtx op_false)
14398 enum machine_mode mode = GET_MODE (dest);
14401 cmp_op0 = force_reg (mode, cmp_op0);
14402 if (!nonimmediate_operand (cmp_op1, mode))
14403 cmp_op1 = force_reg (mode, cmp_op1);
14406 || reg_overlap_mentioned_p (dest, op_true)
14407 || reg_overlap_mentioned_p (dest, op_false))
14408 dest = gen_reg_rtx (mode);
14410 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
14411 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14416 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
14417 operations. This is used for both scalar and vector conditional moves. */
14420 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
14422 enum machine_mode mode = GET_MODE (dest);
14425 if (op_false == CONST0_RTX (mode))
14427 op_true = force_reg (mode, op_true);
14428 x = gen_rtx_AND (mode, cmp, op_true);
14429 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14431 else if (op_true == CONST0_RTX (mode))
14433 op_false = force_reg (mode, op_false);
14434 x = gen_rtx_NOT (mode, cmp);
14435 x = gen_rtx_AND (mode, x, op_false);
14436 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14438 else if (TARGET_SSE5)
14440 rtx pcmov = gen_rtx_SET (mode, dest,
14441 gen_rtx_IF_THEN_ELSE (mode, cmp,
14448 op_true = force_reg (mode, op_true);
14449 op_false = force_reg (mode, op_false);
14451 t2 = gen_reg_rtx (mode);
14453 t3 = gen_reg_rtx (mode);
14457 x = gen_rtx_AND (mode, op_true, cmp);
14458 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
14460 x = gen_rtx_NOT (mode, cmp);
14461 x = gen_rtx_AND (mode, x, op_false);
14462 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
14464 x = gen_rtx_IOR (mode, t3, t2);
14465 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14469 /* Expand a floating-point conditional move. Return true if successful. */
14472 ix86_expand_fp_movcc (rtx operands[])
14474 enum machine_mode mode = GET_MODE (operands[0]);
14475 enum rtx_code code = GET_CODE (operands[1]);
14476 rtx tmp, compare_op, second_test, bypass_test;
14478 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
14480 enum machine_mode cmode;
14482 /* Since we've no cmove for sse registers, don't force bad register
14483 allocation just to gain access to it. Deny movcc when the
14484 comparison mode doesn't match the move mode. */
14485 cmode = GET_MODE (ix86_compare_op0);
14486 if (cmode == VOIDmode)
14487 cmode = GET_MODE (ix86_compare_op1);
14491 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
14493 &ix86_compare_op1);
14494 if (code == UNKNOWN)
14497 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
14498 ix86_compare_op1, operands[2],
14502 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
14503 ix86_compare_op1, operands[2], operands[3]);
14504 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
14508 /* The floating point conditional move instructions don't directly
14509 support conditions resulting from a signed integer comparison. */
14511 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14513 /* The floating point conditional move instructions don't directly
14514 support signed integer comparisons. */
14516 if (!fcmov_comparison_operator (compare_op, VOIDmode))
14518 gcc_assert (!second_test && !bypass_test);
14519 tmp = gen_reg_rtx (QImode);
14520 ix86_expand_setcc (code, tmp);
14522 ix86_compare_op0 = tmp;
14523 ix86_compare_op1 = const0_rtx;
14524 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14526 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
14528 tmp = gen_reg_rtx (mode);
14529 emit_move_insn (tmp, operands[3]);
14532 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
14534 tmp = gen_reg_rtx (mode);
14535 emit_move_insn (tmp, operands[2]);
14539 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14540 gen_rtx_IF_THEN_ELSE (mode, compare_op,
14541 operands[2], operands[3])));
14543 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14544 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
14545 operands[3], operands[0])));
14547 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14548 gen_rtx_IF_THEN_ELSE (mode, second_test,
14549 operands[2], operands[0])));
14554 /* Expand a floating-point vector conditional move; a vcond operation
14555 rather than a movcc operation. */
14558 ix86_expand_fp_vcond (rtx operands[])
14560 enum rtx_code code = GET_CODE (operands[3]);
14563 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
14564 &operands[4], &operands[5]);
14565 if (code == UNKNOWN)
14568 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
14569 operands[5], operands[1], operands[2]))
14572 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
14573 operands[1], operands[2]);
14574 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
14578 /* Expand a signed/unsigned integral vector conditional move. */
14581 ix86_expand_int_vcond (rtx operands[])
14583 enum machine_mode mode = GET_MODE (operands[0]);
14584 enum rtx_code code = GET_CODE (operands[3]);
14585 bool negate = false;
14588 cop0 = operands[4];
14589 cop1 = operands[5];
14591 /* SSE5 supports all of the comparisons on all vector int types. */
14594 /* Canonicalize the comparison to EQ, GT, GTU. */
14605 code = reverse_condition (code);
14611 code = reverse_condition (code);
14617 code = swap_condition (code);
14618 x = cop0, cop0 = cop1, cop1 = x;
14622 gcc_unreachable ();
14625 /* Only SSE4.1/SSE4.2 supports V2DImode. */
14626 if (mode == V2DImode)
14631 /* SSE4.1 supports EQ. */
14632 if (!TARGET_SSE4_1)
14638 /* SSE4.2 supports GT/GTU. */
14639 if (!TARGET_SSE4_2)
14644 gcc_unreachable ();
14648 /* Unsigned parallel compare is not supported by the hardware. Play some
14649 tricks to turn this into a signed comparison against 0. */
14652 cop0 = force_reg (mode, cop0);
14661 /* Perform a parallel modulo subtraction. */
14662 t1 = gen_reg_rtx (mode);
14663 emit_insn ((mode == V4SImode
14665 : gen_subv2di3) (t1, cop0, cop1));
14667 /* Extract the original sign bit of op0. */
14668 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
14670 t2 = gen_reg_rtx (mode);
14671 emit_insn ((mode == V4SImode
14673 : gen_andv2di3) (t2, cop0, mask));
14675 /* XOR it back into the result of the subtraction. This results
14676 in the sign bit set iff we saw unsigned underflow. */
14677 x = gen_reg_rtx (mode);
14678 emit_insn ((mode == V4SImode
14680 : gen_xorv2di3) (x, t1, t2));
14688 /* Perform a parallel unsigned saturating subtraction. */
14689 x = gen_reg_rtx (mode);
14690 emit_insn (gen_rtx_SET (VOIDmode, x,
14691 gen_rtx_US_MINUS (mode, cop0, cop1)));
14698 gcc_unreachable ();
14702 cop1 = CONST0_RTX (mode);
14706 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
14707 operands[1+negate], operands[2-negate]);
14709 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
14710 operands[2-negate]);
14714 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
14715 true if we should do zero extension, else sign extension. HIGH_P is
14716 true if we want the N/2 high elements, else the low elements. */
14719 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
14721 enum machine_mode imode = GET_MODE (operands[1]);
14722 rtx (*unpack)(rtx, rtx, rtx);
14729 unpack = gen_vec_interleave_highv16qi;
14731 unpack = gen_vec_interleave_lowv16qi;
14735 unpack = gen_vec_interleave_highv8hi;
14737 unpack = gen_vec_interleave_lowv8hi;
14741 unpack = gen_vec_interleave_highv4si;
14743 unpack = gen_vec_interleave_lowv4si;
14746 gcc_unreachable ();
14749 dest = gen_lowpart (imode, operands[0]);
14752 se = force_reg (imode, CONST0_RTX (imode));
14754 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
14755 operands[1], pc_rtx, pc_rtx);
14757 emit_insn (unpack (dest, operands[1], se));
14760 /* This function performs the same task as ix86_expand_sse_unpack,
14761 but with SSE4.1 instructions. */
14764 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
14766 enum machine_mode imode = GET_MODE (operands[1]);
14767 rtx (*unpack)(rtx, rtx);
14774 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
14776 unpack = gen_sse4_1_extendv8qiv8hi2;
14780 unpack = gen_sse4_1_zero_extendv4hiv4si2;
14782 unpack = gen_sse4_1_extendv4hiv4si2;
14786 unpack = gen_sse4_1_zero_extendv2siv2di2;
14788 unpack = gen_sse4_1_extendv2siv2di2;
14791 gcc_unreachable ();
14794 dest = operands[0];
14797 /* Shift higher 8 bytes to lower 8 bytes. */
14798 src = gen_reg_rtx (imode);
14799 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
14800 gen_lowpart (TImode, operands[1]),
14806 emit_insn (unpack (dest, src));
14809 /* This function performs the same task as ix86_expand_sse_unpack,
14810 but with sse5 instructions. */
14813 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
14815 enum machine_mode imode = GET_MODE (operands[1]);
14816 int pperm_bytes[16];
14818 int h = (high_p) ? 8 : 0;
14821 rtvec v = rtvec_alloc (16);
14824 rtx op0 = operands[0], op1 = operands[1];
14829 vs = rtvec_alloc (8);
14830 h2 = (high_p) ? 8 : 0;
14831 for (i = 0; i < 8; i++)
14833 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
14834 pperm_bytes[2*i+1] = ((unsigned_p)
14836 : PPERM_SIGN | PPERM_SRC2 | i | h);
14839 for (i = 0; i < 16; i++)
14840 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14842 for (i = 0; i < 8; i++)
14843 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
14845 p = gen_rtx_PARALLEL (VOIDmode, vs);
14846 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14848 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
14850 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
14854 vs = rtvec_alloc (4);
14855 h2 = (high_p) ? 4 : 0;
14856 for (i = 0; i < 4; i++)
14858 sign_extend = ((unsigned_p)
14860 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
14861 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
14862 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
14863 pperm_bytes[4*i+2] = sign_extend;
14864 pperm_bytes[4*i+3] = sign_extend;
14867 for (i = 0; i < 16; i++)
14868 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14870 for (i = 0; i < 4; i++)
14871 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
14873 p = gen_rtx_PARALLEL (VOIDmode, vs);
14874 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14876 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
14878 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
14882 vs = rtvec_alloc (2);
14883 h2 = (high_p) ? 2 : 0;
14884 for (i = 0; i < 2; i++)
14886 sign_extend = ((unsigned_p)
14888 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
14889 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
14890 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
14891 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
14892 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
14893 pperm_bytes[8*i+4] = sign_extend;
14894 pperm_bytes[8*i+5] = sign_extend;
14895 pperm_bytes[8*i+6] = sign_extend;
14896 pperm_bytes[8*i+7] = sign_extend;
14899 for (i = 0; i < 16; i++)
14900 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14902 for (i = 0; i < 2; i++)
14903 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
14905 p = gen_rtx_PARALLEL (VOIDmode, vs);
14906 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14908 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
14910 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
14914 gcc_unreachable ();
14920 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
14921 next narrower integer vector type */
14923 ix86_expand_sse5_pack (rtx operands[3])
14925 enum machine_mode imode = GET_MODE (operands[0]);
14926 int pperm_bytes[16];
14928 rtvec v = rtvec_alloc (16);
14930 rtx op0 = operands[0];
14931 rtx op1 = operands[1];
14932 rtx op2 = operands[2];
14937 for (i = 0; i < 8; i++)
14939 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
14940 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
14943 for (i = 0; i < 16; i++)
14944 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14946 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14947 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
14951 for (i = 0; i < 4; i++)
14953 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
14954 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
14955 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
14956 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
14959 for (i = 0; i < 16; i++)
14960 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14962 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14963 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
14967 for (i = 0; i < 2; i++)
14969 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
14970 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
14971 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
14972 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
14973 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
14974 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
14975 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
14976 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
14979 for (i = 0; i < 16; i++)
14980 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14982 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14983 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
14987 gcc_unreachable ();
14993 /* Expand conditional increment or decrement using adb/sbb instructions.
14994 The default case using setcc followed by the conditional move can be
14995 done by generic code. */
14997 ix86_expand_int_addcc (rtx operands[])
14999 enum rtx_code code = GET_CODE (operands[1]);
15001 rtx val = const0_rtx;
15002 bool fpcmp = false;
15003 enum machine_mode mode = GET_MODE (operands[0]);
15005 if (operands[3] != const1_rtx
15006 && operands[3] != constm1_rtx)
15008 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15009 ix86_compare_op1, &compare_op))
15011 code = GET_CODE (compare_op);
15013 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15014 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15017 code = ix86_fp_compare_code_to_integer (code);
15024 PUT_CODE (compare_op,
15025 reverse_condition_maybe_unordered
15026 (GET_CODE (compare_op)));
15028 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15030 PUT_MODE (compare_op, mode);
15032 /* Construct either adc or sbb insn. */
15033 if ((code == LTU) == (operands[3] == constm1_rtx))
15035 switch (GET_MODE (operands[0]))
15038 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
15041 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
15044 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
15047 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15050 gcc_unreachable ();
15055 switch (GET_MODE (operands[0]))
15058 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
15061 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
15064 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
15067 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15070 gcc_unreachable ();
15073 return 1; /* DONE */
15077 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
15078 works for floating pointer parameters and nonoffsetable memories.
15079 For pushes, it returns just stack offsets; the values will be saved
15080 in the right order. Maximally three parts are generated. */
15083 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
15088 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
15090 size = (GET_MODE_SIZE (mode) + 4) / 8;
15092 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
15093 gcc_assert (size >= 2 && size <= 4);
15095 /* Optimize constant pool reference to immediates. This is used by fp
15096 moves, that force all constants to memory to allow combining. */
15097 if (MEM_P (operand) && MEM_READONLY_P (operand))
15099 rtx tmp = maybe_get_pool_constant (operand);
15104 if (MEM_P (operand) && !offsettable_memref_p (operand))
15106 /* The only non-offsetable memories we handle are pushes. */
15107 int ok = push_operand (operand, VOIDmode);
15111 operand = copy_rtx (operand);
15112 PUT_MODE (operand, Pmode);
15113 parts[0] = parts[1] = parts[2] = parts[3] = operand;
15117 if (GET_CODE (operand) == CONST_VECTOR)
15119 enum machine_mode imode = int_mode_for_mode (mode);
15120 /* Caution: if we looked through a constant pool memory above,
15121 the operand may actually have a different mode now. That's
15122 ok, since we want to pun this all the way back to an integer. */
15123 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
15124 gcc_assert (operand != NULL);
15130 if (mode == DImode)
15131 split_di (&operand, 1, &parts[0], &parts[1]);
15136 if (REG_P (operand))
15138 gcc_assert (reload_completed);
15139 for (i = 0; i < size; i++)
15140 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
15142 else if (offsettable_memref_p (operand))
15144 operand = adjust_address (operand, SImode, 0);
15145 parts[0] = operand;
15146 for (i = 1; i < size; i++)
15147 parts[i] = adjust_address (operand, SImode, 4 * i);
15149 else if (GET_CODE (operand) == CONST_DOUBLE)
15154 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15158 real_to_target (l, &r, mode);
15159 parts[3] = gen_int_mode (l[3], SImode);
15160 parts[2] = gen_int_mode (l[2], SImode);
15163 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
15164 parts[2] = gen_int_mode (l[2], SImode);
15167 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15170 gcc_unreachable ();
15172 parts[1] = gen_int_mode (l[1], SImode);
15173 parts[0] = gen_int_mode (l[0], SImode);
15176 gcc_unreachable ();
15181 if (mode == TImode)
15182 split_ti (&operand, 1, &parts[0], &parts[1]);
15183 if (mode == XFmode || mode == TFmode)
15185 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
15186 if (REG_P (operand))
15188 gcc_assert (reload_completed);
15189 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
15190 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
15192 else if (offsettable_memref_p (operand))
15194 operand = adjust_address (operand, DImode, 0);
15195 parts[0] = operand;
15196 parts[1] = adjust_address (operand, upper_mode, 8);
15198 else if (GET_CODE (operand) == CONST_DOUBLE)
15203 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15204 real_to_target (l, &r, mode);
15206 /* Do not use shift by 32 to avoid warning on 32bit systems. */
15207 if (HOST_BITS_PER_WIDE_INT >= 64)
15210 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
15211 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
15214 parts[0] = immed_double_const (l[0], l[1], DImode);
15216 if (upper_mode == SImode)
15217 parts[1] = gen_int_mode (l[2], SImode);
15218 else if (HOST_BITS_PER_WIDE_INT >= 64)
15221 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
15222 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
15225 parts[1] = immed_double_const (l[2], l[3], DImode);
15228 gcc_unreachable ();
15235 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
15236 Return false when normal moves are needed; true when all required
15237 insns have been emitted. Operands 2-4 contain the input values
15238 int the correct order; operands 5-7 contain the output values. */
15241 ix86_split_long_move (rtx operands[])
15246 int collisions = 0;
15247 enum machine_mode mode = GET_MODE (operands[0]);
15248 bool collisionparts[4];
15250 /* The DFmode expanders may ask us to move double.
15251 For 64bit target this is single move. By hiding the fact
15252 here we simplify i386.md splitters. */
15253 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
15255 /* Optimize constant pool reference to immediates. This is used by
15256 fp moves, that force all constants to memory to allow combining. */
15258 if (MEM_P (operands[1])
15259 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
15260 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
15261 operands[1] = get_pool_constant (XEXP (operands[1], 0));
15262 if (push_operand (operands[0], VOIDmode))
15264 operands[0] = copy_rtx (operands[0]);
15265 PUT_MODE (operands[0], Pmode);
15268 operands[0] = gen_lowpart (DImode, operands[0]);
15269 operands[1] = gen_lowpart (DImode, operands[1]);
15270 emit_move_insn (operands[0], operands[1]);
15274 /* The only non-offsettable memory we handle is push. */
15275 if (push_operand (operands[0], VOIDmode))
15278 gcc_assert (!MEM_P (operands[0])
15279 || offsettable_memref_p (operands[0]));
15281 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
15282 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
15284 /* When emitting push, take care for source operands on the stack. */
15285 if (push && MEM_P (operands[1])
15286 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
15287 for (i = 0; i < nparts - 1; i++)
15288 part[1][i] = change_address (part[1][i],
15289 GET_MODE (part[1][i]),
15290 XEXP (part[1][i + 1], 0));
15292 /* We need to do copy in the right order in case an address register
15293 of the source overlaps the destination. */
15294 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
15298 for (i = 0; i < nparts; i++)
15301 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
15302 if (collisionparts[i])
15306 /* Collision in the middle part can be handled by reordering. */
15307 if (collisions == 1 && nparts == 3 && collisionparts [1])
15309 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
15310 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
15312 else if (collisions == 1
15314 && (collisionparts [1] || collisionparts [2]))
15316 if (collisionparts [1])
15318 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
15319 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
15323 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
15324 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
15328 /* If there are more collisions, we can't handle it by reordering.
15329 Do an lea to the last part and use only one colliding move. */
15330 else if (collisions > 1)
15336 base = part[0][nparts - 1];
15338 /* Handle the case when the last part isn't valid for lea.
15339 Happens in 64-bit mode storing the 12-byte XFmode. */
15340 if (GET_MODE (base) != Pmode)
15341 base = gen_rtx_REG (Pmode, REGNO (base));
15343 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
15344 part[1][0] = replace_equiv_address (part[1][0], base);
15345 for (i = 1; i < nparts; i++)
15347 tmp = plus_constant (base, UNITS_PER_WORD * i);
15348 part[1][i] = replace_equiv_address (part[1][i], tmp);
15359 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
15360 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
15361 emit_move_insn (part[0][2], part[1][2]);
15363 else if (nparts == 4)
15365 emit_move_insn (part[0][3], part[1][3]);
15366 emit_move_insn (part[0][2], part[1][2]);
15371 /* In 64bit mode we don't have 32bit push available. In case this is
15372 register, it is OK - we will just use larger counterpart. We also
15373 retype memory - these comes from attempt to avoid REX prefix on
15374 moving of second half of TFmode value. */
15375 if (GET_MODE (part[1][1]) == SImode)
15377 switch (GET_CODE (part[1][1]))
15380 part[1][1] = adjust_address (part[1][1], DImode, 0);
15384 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
15388 gcc_unreachable ();
15391 if (GET_MODE (part[1][0]) == SImode)
15392 part[1][0] = part[1][1];
15395 emit_move_insn (part[0][1], part[1][1]);
15396 emit_move_insn (part[0][0], part[1][0]);
15400 /* Choose correct order to not overwrite the source before it is copied. */
15401 if ((REG_P (part[0][0])
15402 && REG_P (part[1][1])
15403 && (REGNO (part[0][0]) == REGNO (part[1][1])
15405 && REGNO (part[0][0]) == REGNO (part[1][2]))
15407 && REGNO (part[0][0]) == REGNO (part[1][3]))))
15409 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
15411 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
15413 operands[2 + i] = part[0][j];
15414 operands[6 + i] = part[1][j];
15419 for (i = 0; i < nparts; i++)
15421 operands[2 + i] = part[0][i];
15422 operands[6 + i] = part[1][i];
15426 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
15429 for (j = 0; j < nparts - 1; j++)
15430 if (CONST_INT_P (operands[6 + j])
15431 && operands[6 + j] != const0_rtx
15432 && REG_P (operands[2 + j]))
15433 for (i = j; i < nparts - 1; i++)
15434 if (CONST_INT_P (operands[7 + i])
15435 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
15436 operands[7 + i] = operands[2 + j];
15439 for (i = 0; i < nparts; i++)
15440 emit_move_insn (operands[2 + i], operands[6 + i]);
15445 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
15446 left shift by a constant, either using a single shift or
15447 a sequence of add instructions. */
15450 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
15454 emit_insn ((mode == DImode
15456 : gen_adddi3) (operand, operand, operand));
15458 else if (!optimize_size
15459 && count * ix86_cost->add <= ix86_cost->shift_const)
15462 for (i=0; i<count; i++)
15464 emit_insn ((mode == DImode
15466 : gen_adddi3) (operand, operand, operand));
15470 emit_insn ((mode == DImode
15472 : gen_ashldi3) (operand, operand, GEN_INT (count)));
15476 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
15478 rtx low[2], high[2];
15480 const int single_width = mode == DImode ? 32 : 64;
15482 if (CONST_INT_P (operands[2]))
15484 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
15485 count = INTVAL (operands[2]) & (single_width * 2 - 1);
15487 if (count >= single_width)
15489 emit_move_insn (high[0], low[1]);
15490 emit_move_insn (low[0], const0_rtx);
15492 if (count > single_width)
15493 ix86_expand_ashl_const (high[0], count - single_width, mode);
15497 if (!rtx_equal_p (operands[0], operands[1]))
15498 emit_move_insn (operands[0], operands[1]);
15499 emit_insn ((mode == DImode
15501 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
15502 ix86_expand_ashl_const (low[0], count, mode);
15507 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
15509 if (operands[1] == const1_rtx)
15511 /* Assuming we've chosen a QImode capable registers, then 1 << N
15512 can be done with two 32/64-bit shifts, no branches, no cmoves. */
15513 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
15515 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
15517 ix86_expand_clear (low[0]);
15518 ix86_expand_clear (high[0]);
15519 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
15521 d = gen_lowpart (QImode, low[0]);
15522 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
15523 s = gen_rtx_EQ (QImode, flags, const0_rtx);
15524 emit_insn (gen_rtx_SET (VOIDmode, d, s));
15526 d = gen_lowpart (QImode, high[0]);
15527 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
15528 s = gen_rtx_NE (QImode, flags, const0_rtx);
15529 emit_insn (gen_rtx_SET (VOIDmode, d, s));
15532 /* Otherwise, we can get the same results by manually performing
15533 a bit extract operation on bit 5/6, and then performing the two
15534 shifts. The two methods of getting 0/1 into low/high are exactly
15535 the same size. Avoiding the shift in the bit extract case helps
15536 pentium4 a bit; no one else seems to care much either way. */
15541 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
15542 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
15544 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
15545 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
15547 emit_insn ((mode == DImode
15549 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
15550 emit_insn ((mode == DImode
15552 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
15553 emit_move_insn (low[0], high[0]);
15554 emit_insn ((mode == DImode
15556 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
15559 emit_insn ((mode == DImode
15561 : gen_ashldi3) (low[0], low[0], operands[2]));
15562 emit_insn ((mode == DImode
15564 : gen_ashldi3) (high[0], high[0], operands[2]));
15568 if (operands[1] == constm1_rtx)
15570 /* For -1 << N, we can avoid the shld instruction, because we
15571 know that we're shifting 0...31/63 ones into a -1. */
15572 emit_move_insn (low[0], constm1_rtx);
15574 emit_move_insn (high[0], low[0]);
15576 emit_move_insn (high[0], constm1_rtx);
15580 if (!rtx_equal_p (operands[0], operands[1]))
15581 emit_move_insn (operands[0], operands[1]);
15583 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
15584 emit_insn ((mode == DImode
15586 : gen_x86_64_shld) (high[0], low[0], operands[2]));
15589 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
15591 if (TARGET_CMOVE && scratch)
15593 ix86_expand_clear (scratch);
15594 emit_insn ((mode == DImode
15595 ? gen_x86_shift_adj_1
15596 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
15600 emit_insn ((mode == DImode
15601 ? gen_x86_shift_adj_2
15602 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
15606 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
15608 rtx low[2], high[2];
15610 const int single_width = mode == DImode ? 32 : 64;
15612 if (CONST_INT_P (operands[2]))
15614 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
15615 count = INTVAL (operands[2]) & (single_width * 2 - 1);
15617 if (count == single_width * 2 - 1)
15619 emit_move_insn (high[0], high[1]);
15620 emit_insn ((mode == DImode
15622 : gen_ashrdi3) (high[0], high[0],
15623 GEN_INT (single_width - 1)));
15624 emit_move_insn (low[0], high[0]);
15627 else if (count >= single_width)
15629 emit_move_insn (low[0], high[1]);
15630 emit_move_insn (high[0], low[0]);
15631 emit_insn ((mode == DImode
15633 : gen_ashrdi3) (high[0], high[0],
15634 GEN_INT (single_width - 1)));
15635 if (count > single_width)
15636 emit_insn ((mode == DImode
15638 : gen_ashrdi3) (low[0], low[0],
15639 GEN_INT (count - single_width)));
15643 if (!rtx_equal_p (operands[0], operands[1]))
15644 emit_move_insn (operands[0], operands[1]);
15645 emit_insn ((mode == DImode
15647 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
15648 emit_insn ((mode == DImode
15650 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
15655 if (!rtx_equal_p (operands[0], operands[1]))
15656 emit_move_insn (operands[0], operands[1]);
15658 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
15660 emit_insn ((mode == DImode
15662 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
15663 emit_insn ((mode == DImode
15665 : gen_ashrdi3) (high[0], high[0], operands[2]));
15667 if (TARGET_CMOVE && scratch)
15669 emit_move_insn (scratch, high[0]);
15670 emit_insn ((mode == DImode
15672 : gen_ashrdi3) (scratch, scratch,
15673 GEN_INT (single_width - 1)));
15674 emit_insn ((mode == DImode
15675 ? gen_x86_shift_adj_1
15676 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
15680 emit_insn ((mode == DImode
15681 ? gen_x86_shift_adj_3
15682 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
15687 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
15689 rtx low[2], high[2];
15691 const int single_width = mode == DImode ? 32 : 64;
15693 if (CONST_INT_P (operands[2]))
15695 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
15696 count = INTVAL (operands[2]) & (single_width * 2 - 1);
15698 if (count >= single_width)
15700 emit_move_insn (low[0], high[1]);
15701 ix86_expand_clear (high[0]);
15703 if (count > single_width)
15704 emit_insn ((mode == DImode
15706 : gen_lshrdi3) (low[0], low[0],
15707 GEN_INT (count - single_width)));
15711 if (!rtx_equal_p (operands[0], operands[1]))
15712 emit_move_insn (operands[0], operands[1]);
15713 emit_insn ((mode == DImode
15715 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
15716 emit_insn ((mode == DImode
15718 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
15723 if (!rtx_equal_p (operands[0], operands[1]))
15724 emit_move_insn (operands[0], operands[1]);
15726 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
15728 emit_insn ((mode == DImode
15730 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
15731 emit_insn ((mode == DImode
15733 : gen_lshrdi3) (high[0], high[0], operands[2]));
15735 /* Heh. By reversing the arguments, we can reuse this pattern. */
15736 if (TARGET_CMOVE && scratch)
15738 ix86_expand_clear (scratch);
15739 emit_insn ((mode == DImode
15740 ? gen_x86_shift_adj_1
15741 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
15745 emit_insn ((mode == DImode
15746 ? gen_x86_shift_adj_2
15747 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
15751 /* Predict just emitted jump instruction to be taken with probability PROB. */
15753 predict_jump (int prob)
15755 rtx insn = get_last_insn ();
15756 gcc_assert (JUMP_P (insn));
15758 = gen_rtx_EXPR_LIST (REG_BR_PROB,
15763 /* Helper function for the string operations below. Dest VARIABLE whether
15764 it is aligned to VALUE bytes. If true, jump to the label. */
15766 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
15768 rtx label = gen_label_rtx ();
15769 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
15770 if (GET_MODE (variable) == DImode)
15771 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
15773 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
15774 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
15777 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15779 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15783 /* Adjust COUNTER by the VALUE. */
15785 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
15787 if (GET_MODE (countreg) == DImode)
15788 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
15790 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
15793 /* Zero extend possibly SImode EXP to Pmode register. */
15795 ix86_zero_extend_to_Pmode (rtx exp)
15798 if (GET_MODE (exp) == VOIDmode)
15799 return force_reg (Pmode, exp);
15800 if (GET_MODE (exp) == Pmode)
15801 return copy_to_mode_reg (Pmode, exp);
15802 r = gen_reg_rtx (Pmode);
15803 emit_insn (gen_zero_extendsidi2 (r, exp));
15807 /* Divide COUNTREG by SCALE. */
15809 scale_counter (rtx countreg, int scale)
15812 rtx piece_size_mask;
15816 if (CONST_INT_P (countreg))
15817 return GEN_INT (INTVAL (countreg) / scale);
15818 gcc_assert (REG_P (countreg));
15820 piece_size_mask = GEN_INT (scale - 1);
15821 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
15822 GEN_INT (exact_log2 (scale)),
15823 NULL, 1, OPTAB_DIRECT);
15827 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
15828 DImode for constant loop counts. */
15830 static enum machine_mode
15831 counter_mode (rtx count_exp)
15833 if (GET_MODE (count_exp) != VOIDmode)
15834 return GET_MODE (count_exp);
15835 if (GET_CODE (count_exp) != CONST_INT)
15837 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
15842 /* When SRCPTR is non-NULL, output simple loop to move memory
15843 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
15844 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
15845 equivalent loop to set memory by VALUE (supposed to be in MODE).
15847 The size is rounded down to whole number of chunk size moved at once.
15848 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
15852 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
15853 rtx destptr, rtx srcptr, rtx value,
15854 rtx count, enum machine_mode mode, int unroll,
15857 rtx out_label, top_label, iter, tmp;
15858 enum machine_mode iter_mode = counter_mode (count);
15859 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
15860 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
15866 top_label = gen_label_rtx ();
15867 out_label = gen_label_rtx ();
15868 iter = gen_reg_rtx (iter_mode);
15870 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
15871 NULL, 1, OPTAB_DIRECT);
15872 /* Those two should combine. */
15873 if (piece_size == const1_rtx)
15875 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
15877 predict_jump (REG_BR_PROB_BASE * 10 / 100);
15879 emit_move_insn (iter, const0_rtx);
15881 emit_label (top_label);
15883 tmp = convert_modes (Pmode, iter_mode, iter, true);
15884 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
15885 destmem = change_address (destmem, mode, x_addr);
15889 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
15890 srcmem = change_address (srcmem, mode, y_addr);
15892 /* When unrolling for chips that reorder memory reads and writes,
15893 we can save registers by using single temporary.
15894 Also using 4 temporaries is overkill in 32bit mode. */
15895 if (!TARGET_64BIT && 0)
15897 for (i = 0; i < unroll; i++)
15902 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
15904 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
15906 emit_move_insn (destmem, srcmem);
15912 gcc_assert (unroll <= 4);
15913 for (i = 0; i < unroll; i++)
15915 tmpreg[i] = gen_reg_rtx (mode);
15919 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
15921 emit_move_insn (tmpreg[i], srcmem);
15923 for (i = 0; i < unroll; i++)
15928 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
15930 emit_move_insn (destmem, tmpreg[i]);
15935 for (i = 0; i < unroll; i++)
15939 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
15940 emit_move_insn (destmem, value);
15943 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
15944 true, OPTAB_LIB_WIDEN);
15946 emit_move_insn (iter, tmp);
15948 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
15950 if (expected_size != -1)
15952 expected_size /= GET_MODE_SIZE (mode) * unroll;
15953 if (expected_size == 0)
15955 else if (expected_size > REG_BR_PROB_BASE)
15956 predict_jump (REG_BR_PROB_BASE - 1);
15958 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
15961 predict_jump (REG_BR_PROB_BASE * 80 / 100);
15962 iter = ix86_zero_extend_to_Pmode (iter);
15963 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
15964 true, OPTAB_LIB_WIDEN);
15965 if (tmp != destptr)
15966 emit_move_insn (destptr, tmp);
15969 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
15970 true, OPTAB_LIB_WIDEN);
15972 emit_move_insn (srcptr, tmp);
15974 emit_label (out_label);
15977 /* Output "rep; mov" instruction.
15978 Arguments have same meaning as for previous function */
15980 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
15981 rtx destptr, rtx srcptr,
15983 enum machine_mode mode)
15989 /* If the size is known, it is shorter to use rep movs. */
15990 if (mode == QImode && CONST_INT_P (count)
15991 && !(INTVAL (count) & 3))
15994 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
15995 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
15996 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
15997 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
15998 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
15999 if (mode != QImode)
16001 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16002 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16003 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16004 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
16005 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16006 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
16010 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16011 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
16013 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
16017 /* Output "rep; stos" instruction.
16018 Arguments have same meaning as for previous function */
16020 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
16022 enum machine_mode mode)
16027 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16028 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16029 value = force_reg (mode, gen_lowpart (mode, value));
16030 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16031 if (mode != QImode)
16033 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16034 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16035 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16038 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16039 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
16043 emit_strmov (rtx destmem, rtx srcmem,
16044 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
16046 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
16047 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
16048 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16051 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
16053 expand_movmem_epilogue (rtx destmem, rtx srcmem,
16054 rtx destptr, rtx srcptr, rtx count, int max_size)
16057 if (CONST_INT_P (count))
16059 HOST_WIDE_INT countval = INTVAL (count);
16062 if ((countval & 0x10) && max_size > 16)
16066 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16067 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
16070 gcc_unreachable ();
16073 if ((countval & 0x08) && max_size > 8)
16076 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16079 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16080 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
16084 if ((countval & 0x04) && max_size > 4)
16086 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16089 if ((countval & 0x02) && max_size > 2)
16091 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
16094 if ((countval & 0x01) && max_size > 1)
16096 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
16103 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
16104 count, 1, OPTAB_DIRECT);
16105 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
16106 count, QImode, 1, 4);
16110 /* When there are stringops, we can cheaply increase dest and src pointers.
16111 Otherwise we save code size by maintaining offset (zero is readily
16112 available from preceding rep operation) and using x86 addressing modes.
16114 if (TARGET_SINGLE_STRINGOP)
16118 rtx label = ix86_expand_aligntest (count, 4, true);
16119 src = change_address (srcmem, SImode, srcptr);
16120 dest = change_address (destmem, SImode, destptr);
16121 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16122 emit_label (label);
16123 LABEL_NUSES (label) = 1;
16127 rtx label = ix86_expand_aligntest (count, 2, true);
16128 src = change_address (srcmem, HImode, srcptr);
16129 dest = change_address (destmem, HImode, destptr);
16130 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16131 emit_label (label);
16132 LABEL_NUSES (label) = 1;
16136 rtx label = ix86_expand_aligntest (count, 1, true);
16137 src = change_address (srcmem, QImode, srcptr);
16138 dest = change_address (destmem, QImode, destptr);
16139 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16140 emit_label (label);
16141 LABEL_NUSES (label) = 1;
16146 rtx offset = force_reg (Pmode, const0_rtx);
16151 rtx label = ix86_expand_aligntest (count, 4, true);
16152 src = change_address (srcmem, SImode, srcptr);
16153 dest = change_address (destmem, SImode, destptr);
16154 emit_move_insn (dest, src);
16155 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
16156 true, OPTAB_LIB_WIDEN);
16158 emit_move_insn (offset, tmp);
16159 emit_label (label);
16160 LABEL_NUSES (label) = 1;
16164 rtx label = ix86_expand_aligntest (count, 2, true);
16165 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16166 src = change_address (srcmem, HImode, tmp);
16167 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16168 dest = change_address (destmem, HImode, tmp);
16169 emit_move_insn (dest, src);
16170 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
16171 true, OPTAB_LIB_WIDEN);
16173 emit_move_insn (offset, tmp);
16174 emit_label (label);
16175 LABEL_NUSES (label) = 1;
16179 rtx label = ix86_expand_aligntest (count, 1, true);
16180 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16181 src = change_address (srcmem, QImode, tmp);
16182 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16183 dest = change_address (destmem, QImode, tmp);
16184 emit_move_insn (dest, src);
16185 emit_label (label);
16186 LABEL_NUSES (label) = 1;
16191 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
16193 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
16194 rtx count, int max_size)
16197 expand_simple_binop (counter_mode (count), AND, count,
16198 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
16199 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
16200 gen_lowpart (QImode, value), count, QImode,
16204 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
16206 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
16210 if (CONST_INT_P (count))
16212 HOST_WIDE_INT countval = INTVAL (count);
16215 if ((countval & 0x10) && max_size > 16)
16219 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
16220 emit_insn (gen_strset (destptr, dest, value));
16221 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
16222 emit_insn (gen_strset (destptr, dest, value));
16225 gcc_unreachable ();
16228 if ((countval & 0x08) && max_size > 8)
16232 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
16233 emit_insn (gen_strset (destptr, dest, value));
16237 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
16238 emit_insn (gen_strset (destptr, dest, value));
16239 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
16240 emit_insn (gen_strset (destptr, dest, value));
16244 if ((countval & 0x04) && max_size > 4)
16246 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
16247 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
16250 if ((countval & 0x02) && max_size > 2)
16252 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
16253 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
16256 if ((countval & 0x01) && max_size > 1)
16258 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
16259 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
16266 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
16271 rtx label = ix86_expand_aligntest (count, 16, true);
16274 dest = change_address (destmem, DImode, destptr);
16275 emit_insn (gen_strset (destptr, dest, value));
16276 emit_insn (gen_strset (destptr, dest, value));
16280 dest = change_address (destmem, SImode, destptr);
16281 emit_insn (gen_strset (destptr, dest, value));
16282 emit_insn (gen_strset (destptr, dest, value));
16283 emit_insn (gen_strset (destptr, dest, value));
16284 emit_insn (gen_strset (destptr, dest, value));
16286 emit_label (label);
16287 LABEL_NUSES (label) = 1;
16291 rtx label = ix86_expand_aligntest (count, 8, true);
16294 dest = change_address (destmem, DImode, destptr);
16295 emit_insn (gen_strset (destptr, dest, value));
16299 dest = change_address (destmem, SImode, destptr);
16300 emit_insn (gen_strset (destptr, dest, value));
16301 emit_insn (gen_strset (destptr, dest, value));
16303 emit_label (label);
16304 LABEL_NUSES (label) = 1;
16308 rtx label = ix86_expand_aligntest (count, 4, true);
16309 dest = change_address (destmem, SImode, destptr);
16310 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
16311 emit_label (label);
16312 LABEL_NUSES (label) = 1;
16316 rtx label = ix86_expand_aligntest (count, 2, true);
16317 dest = change_address (destmem, HImode, destptr);
16318 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
16319 emit_label (label);
16320 LABEL_NUSES (label) = 1;
16324 rtx label = ix86_expand_aligntest (count, 1, true);
16325 dest = change_address (destmem, QImode, destptr);
16326 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
16327 emit_label (label);
16328 LABEL_NUSES (label) = 1;
16332 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
16333 DESIRED_ALIGNMENT. */
16335 expand_movmem_prologue (rtx destmem, rtx srcmem,
16336 rtx destptr, rtx srcptr, rtx count,
16337 int align, int desired_alignment)
16339 if (align <= 1 && desired_alignment > 1)
16341 rtx label = ix86_expand_aligntest (destptr, 1, false);
16342 srcmem = change_address (srcmem, QImode, srcptr);
16343 destmem = change_address (destmem, QImode, destptr);
16344 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16345 ix86_adjust_counter (count, 1);
16346 emit_label (label);
16347 LABEL_NUSES (label) = 1;
16349 if (align <= 2 && desired_alignment > 2)
16351 rtx label = ix86_expand_aligntest (destptr, 2, false);
16352 srcmem = change_address (srcmem, HImode, srcptr);
16353 destmem = change_address (destmem, HImode, destptr);
16354 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16355 ix86_adjust_counter (count, 2);
16356 emit_label (label);
16357 LABEL_NUSES (label) = 1;
16359 if (align <= 4 && desired_alignment > 4)
16361 rtx label = ix86_expand_aligntest (destptr, 4, false);
16362 srcmem = change_address (srcmem, SImode, srcptr);
16363 destmem = change_address (destmem, SImode, destptr);
16364 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16365 ix86_adjust_counter (count, 4);
16366 emit_label (label);
16367 LABEL_NUSES (label) = 1;
16369 gcc_assert (desired_alignment <= 8);
16372 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
16373 DESIRED_ALIGNMENT. */
16375 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
16376 int align, int desired_alignment)
16378 if (align <= 1 && desired_alignment > 1)
16380 rtx label = ix86_expand_aligntest (destptr, 1, false);
16381 destmem = change_address (destmem, QImode, destptr);
16382 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
16383 ix86_adjust_counter (count, 1);
16384 emit_label (label);
16385 LABEL_NUSES (label) = 1;
16387 if (align <= 2 && desired_alignment > 2)
16389 rtx label = ix86_expand_aligntest (destptr, 2, false);
16390 destmem = change_address (destmem, HImode, destptr);
16391 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
16392 ix86_adjust_counter (count, 2);
16393 emit_label (label);
16394 LABEL_NUSES (label) = 1;
16396 if (align <= 4 && desired_alignment > 4)
16398 rtx label = ix86_expand_aligntest (destptr, 4, false);
16399 destmem = change_address (destmem, SImode, destptr);
16400 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
16401 ix86_adjust_counter (count, 4);
16402 emit_label (label);
16403 LABEL_NUSES (label) = 1;
16405 gcc_assert (desired_alignment <= 8);
16408 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
16409 static enum stringop_alg
16410 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
16411 int *dynamic_check)
16413 const struct stringop_algs * algs;
16414 /* Algorithms using the rep prefix want at least edi and ecx;
16415 additionally, memset wants eax and memcpy wants esi. Don't
16416 consider such algorithms if the user has appropriated those
16417 registers for their own purposes. */
16418 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
16420 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
16422 #define ALG_USABLE_P(alg) (rep_prefix_usable \
16423 || (alg != rep_prefix_1_byte \
16424 && alg != rep_prefix_4_byte \
16425 && alg != rep_prefix_8_byte))
16427 *dynamic_check = -1;
16429 algs = &ix86_cost->memset[TARGET_64BIT != 0];
16431 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
16432 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
16433 return stringop_alg;
16434 /* rep; movq or rep; movl is the smallest variant. */
16435 else if (optimize_size)
16437 if (!count || (count & 3))
16438 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
16440 return rep_prefix_usable ? rep_prefix_4_byte : loop;
16442 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
16444 else if (expected_size != -1 && expected_size < 4)
16445 return loop_1_byte;
16446 else if (expected_size != -1)
16449 enum stringop_alg alg = libcall;
16450 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
16452 /* We get here if the algorithms that were not libcall-based
16453 were rep-prefix based and we are unable to use rep prefixes
16454 based on global register usage. Break out of the loop and
16455 use the heuristic below. */
16456 if (algs->size[i].max == 0)
16458 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
16460 enum stringop_alg candidate = algs->size[i].alg;
16462 if (candidate != libcall && ALG_USABLE_P (candidate))
16464 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
16465 last non-libcall inline algorithm. */
16466 if (TARGET_INLINE_ALL_STRINGOPS)
16468 /* When the current size is best to be copied by a libcall,
16469 but we are still forced to inline, run the heuristic below
16470 that will pick code for medium sized blocks. */
16471 if (alg != libcall)
16475 else if (ALG_USABLE_P (candidate))
16479 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
16481 /* When asked to inline the call anyway, try to pick meaningful choice.
16482 We look for maximal size of block that is faster to copy by hand and
16483 take blocks of at most of that size guessing that average size will
16484 be roughly half of the block.
16486 If this turns out to be bad, we might simply specify the preferred
16487 choice in ix86_costs. */
16488 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
16489 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
16492 enum stringop_alg alg;
16494 bool any_alg_usable_p = true;
16496 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
16498 enum stringop_alg candidate = algs->size[i].alg;
16499 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
16501 if (candidate != libcall && candidate
16502 && ALG_USABLE_P (candidate))
16503 max = algs->size[i].max;
16505 /* If there aren't any usable algorithms, then recursing on
16506 smaller sizes isn't going to find anything. Just return the
16507 simple byte-at-a-time copy loop. */
16508 if (!any_alg_usable_p)
16510 /* Pick something reasonable. */
16511 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
16512 *dynamic_check = 128;
16513 return loop_1_byte;
16517 alg = decide_alg (count, max / 2, memset, dynamic_check);
16518 gcc_assert (*dynamic_check == -1);
16519 gcc_assert (alg != libcall);
16520 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
16521 *dynamic_check = max;
16524 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
16525 #undef ALG_USABLE_P
16528 /* Decide on alignment. We know that the operand is already aligned to ALIGN
16529 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
16531 decide_alignment (int align,
16532 enum stringop_alg alg,
16535 int desired_align = 0;
16539 gcc_unreachable ();
16541 case unrolled_loop:
16542 desired_align = GET_MODE_SIZE (Pmode);
16544 case rep_prefix_8_byte:
16547 case rep_prefix_4_byte:
16548 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
16549 copying whole cacheline at once. */
16550 if (TARGET_PENTIUMPRO)
16555 case rep_prefix_1_byte:
16556 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
16557 copying whole cacheline at once. */
16558 if (TARGET_PENTIUMPRO)
16572 if (desired_align < align)
16573 desired_align = align;
16574 if (expected_size != -1 && expected_size < 4)
16575 desired_align = align;
16576 return desired_align;
16579 /* Return the smallest power of 2 greater than VAL. */
16581 smallest_pow2_greater_than (int val)
16589 /* Expand string move (memcpy) operation. Use i386 string operations when
16590 profitable. expand_setmem contains similar code. The code depends upon
16591 architecture, block size and alignment, but always has the same
16594 1) Prologue guard: Conditional that jumps up to epilogues for small
16595 blocks that can be handled by epilogue alone. This is faster but
16596 also needed for correctness, since prologue assume the block is larger
16597 than the desired alignment.
16599 Optional dynamic check for size and libcall for large
16600 blocks is emitted here too, with -minline-stringops-dynamically.
16602 2) Prologue: copy first few bytes in order to get destination aligned
16603 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
16604 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
16605 We emit either a jump tree on power of two sized blocks, or a byte loop.
16607 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
16608 with specified algorithm.
16610 4) Epilogue: code copying tail of the block that is too small to be
16611 handled by main body (or up to size guarded by prologue guard). */
16614 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
16615 rtx expected_align_exp, rtx expected_size_exp)
16621 rtx jump_around_label = NULL;
16622 HOST_WIDE_INT align = 1;
16623 unsigned HOST_WIDE_INT count = 0;
16624 HOST_WIDE_INT expected_size = -1;
16625 int size_needed = 0, epilogue_size_needed;
16626 int desired_align = 0;
16627 enum stringop_alg alg;
16630 if (CONST_INT_P (align_exp))
16631 align = INTVAL (align_exp);
16632 /* i386 can do misaligned access on reasonably increased cost. */
16633 if (CONST_INT_P (expected_align_exp)
16634 && INTVAL (expected_align_exp) > align)
16635 align = INTVAL (expected_align_exp);
16636 if (CONST_INT_P (count_exp))
16637 count = expected_size = INTVAL (count_exp);
16638 if (CONST_INT_P (expected_size_exp) && count == 0)
16639 expected_size = INTVAL (expected_size_exp);
16641 /* Make sure we don't need to care about overflow later on. */
16642 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
16645 /* Step 0: Decide on preferred algorithm, desired alignment and
16646 size of chunks to be copied by main loop. */
16648 alg = decide_alg (count, expected_size, false, &dynamic_check);
16649 desired_align = decide_alignment (align, alg, expected_size);
16651 if (!TARGET_ALIGN_STRINGOPS)
16652 align = desired_align;
16654 if (alg == libcall)
16656 gcc_assert (alg != no_stringop);
16658 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
16659 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16660 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
16665 gcc_unreachable ();
16667 size_needed = GET_MODE_SIZE (Pmode);
16669 case unrolled_loop:
16670 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
16672 case rep_prefix_8_byte:
16675 case rep_prefix_4_byte:
16678 case rep_prefix_1_byte:
16684 epilogue_size_needed = size_needed;
16686 /* Step 1: Prologue guard. */
16688 /* Alignment code needs count to be in register. */
16689 if (CONST_INT_P (count_exp) && desired_align > align)
16690 count_exp = force_reg (counter_mode (count_exp), count_exp);
16691 gcc_assert (desired_align >= 1 && align >= 1);
16693 /* Ensure that alignment prologue won't copy past end of block. */
16694 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
16696 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
16697 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
16698 Make sure it is power of 2. */
16699 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
16701 if (CONST_INT_P (count_exp))
16703 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
16708 label = gen_label_rtx ();
16709 emit_cmp_and_jump_insns (count_exp,
16710 GEN_INT (epilogue_size_needed),
16711 LTU, 0, counter_mode (count_exp), 1, label);
16712 if (expected_size == -1 || expected_size < epilogue_size_needed)
16713 predict_jump (REG_BR_PROB_BASE * 60 / 100);
16715 predict_jump (REG_BR_PROB_BASE * 20 / 100);
16719 /* Emit code to decide on runtime whether library call or inline should be
16721 if (dynamic_check != -1)
16723 if (CONST_INT_P (count_exp))
16725 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
16727 emit_block_move_via_libcall (dst, src, count_exp, false);
16728 count_exp = const0_rtx;
16734 rtx hot_label = gen_label_rtx ();
16735 jump_around_label = gen_label_rtx ();
16736 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
16737 LEU, 0, GET_MODE (count_exp), 1, hot_label);
16738 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16739 emit_block_move_via_libcall (dst, src, count_exp, false);
16740 emit_jump (jump_around_label);
16741 emit_label (hot_label);
16745 /* Step 2: Alignment prologue. */
16747 if (desired_align > align)
16749 /* Except for the first move in epilogue, we no longer know
16750 constant offset in aliasing info. It don't seems to worth
16751 the pain to maintain it for the first move, so throw away
16753 src = change_address (src, BLKmode, srcreg);
16754 dst = change_address (dst, BLKmode, destreg);
16755 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
16758 if (label && size_needed == 1)
16760 emit_label (label);
16761 LABEL_NUSES (label) = 1;
16765 /* Step 3: Main loop. */
16771 gcc_unreachable ();
16773 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
16774 count_exp, QImode, 1, expected_size);
16777 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
16778 count_exp, Pmode, 1, expected_size);
16780 case unrolled_loop:
16781 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
16782 registers for 4 temporaries anyway. */
16783 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
16784 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
16787 case rep_prefix_8_byte:
16788 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
16791 case rep_prefix_4_byte:
16792 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
16795 case rep_prefix_1_byte:
16796 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
16800 /* Adjust properly the offset of src and dest memory for aliasing. */
16801 if (CONST_INT_P (count_exp))
16803 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
16804 (count / size_needed) * size_needed);
16805 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
16806 (count / size_needed) * size_needed);
16810 src = change_address (src, BLKmode, srcreg);
16811 dst = change_address (dst, BLKmode, destreg);
16814 /* Step 4: Epilogue to copy the remaining bytes. */
16818 /* When the main loop is done, COUNT_EXP might hold original count,
16819 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
16820 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
16821 bytes. Compensate if needed. */
16823 if (size_needed < epilogue_size_needed)
16826 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
16827 GEN_INT (size_needed - 1), count_exp, 1,
16829 if (tmp != count_exp)
16830 emit_move_insn (count_exp, tmp);
16832 emit_label (label);
16833 LABEL_NUSES (label) = 1;
16836 if (count_exp != const0_rtx && epilogue_size_needed > 1)
16837 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
16838 epilogue_size_needed);
16839 if (jump_around_label)
16840 emit_label (jump_around_label);
16844 /* Helper function for memcpy. For QImode value 0xXY produce
16845 0xXYXYXYXY of wide specified by MODE. This is essentially
16846 a * 0x10101010, but we can do slightly better than
16847 synth_mult by unwinding the sequence by hand on CPUs with
16850 promote_duplicated_reg (enum machine_mode mode, rtx val)
16852 enum machine_mode valmode = GET_MODE (val);
16854 int nops = mode == DImode ? 3 : 2;
16856 gcc_assert (mode == SImode || mode == DImode);
16857 if (val == const0_rtx)
16858 return copy_to_mode_reg (mode, const0_rtx);
16859 if (CONST_INT_P (val))
16861 HOST_WIDE_INT v = INTVAL (val) & 255;
16865 if (mode == DImode)
16866 v |= (v << 16) << 16;
16867 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
16870 if (valmode == VOIDmode)
16872 if (valmode != QImode)
16873 val = gen_lowpart (QImode, val);
16874 if (mode == QImode)
16876 if (!TARGET_PARTIAL_REG_STALL)
16878 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
16879 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
16880 <= (ix86_cost->shift_const + ix86_cost->add) * nops
16881 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
16883 rtx reg = convert_modes (mode, QImode, val, true);
16884 tmp = promote_duplicated_reg (mode, const1_rtx);
16885 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
16890 rtx reg = convert_modes (mode, QImode, val, true);
16892 if (!TARGET_PARTIAL_REG_STALL)
16893 if (mode == SImode)
16894 emit_insn (gen_movsi_insv_1 (reg, reg));
16896 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
16899 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
16900 NULL, 1, OPTAB_DIRECT);
16902 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
16904 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
16905 NULL, 1, OPTAB_DIRECT);
16906 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
16907 if (mode == SImode)
16909 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
16910 NULL, 1, OPTAB_DIRECT);
16911 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
16916 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
16917 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
16918 alignment from ALIGN to DESIRED_ALIGN. */
16920 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
16925 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
16926 promoted_val = promote_duplicated_reg (DImode, val);
16927 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
16928 promoted_val = promote_duplicated_reg (SImode, val);
16929 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
16930 promoted_val = promote_duplicated_reg (HImode, val);
16932 promoted_val = val;
16934 return promoted_val;
16937 /* Expand string clear operation (bzero). Use i386 string operations when
16938 profitable. See expand_movmem comment for explanation of individual
16939 steps performed. */
16941 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
16942 rtx expected_align_exp, rtx expected_size_exp)
16947 rtx jump_around_label = NULL;
16948 HOST_WIDE_INT align = 1;
16949 unsigned HOST_WIDE_INT count = 0;
16950 HOST_WIDE_INT expected_size = -1;
16951 int size_needed = 0, epilogue_size_needed;
16952 int desired_align = 0;
16953 enum stringop_alg alg;
16954 rtx promoted_val = NULL;
16955 bool force_loopy_epilogue = false;
16958 if (CONST_INT_P (align_exp))
16959 align = INTVAL (align_exp);
16960 /* i386 can do misaligned access on reasonably increased cost. */
16961 if (CONST_INT_P (expected_align_exp)
16962 && INTVAL (expected_align_exp) > align)
16963 align = INTVAL (expected_align_exp);
16964 if (CONST_INT_P (count_exp))
16965 count = expected_size = INTVAL (count_exp);
16966 if (CONST_INT_P (expected_size_exp) && count == 0)
16967 expected_size = INTVAL (expected_size_exp);
16969 /* Make sure we don't need to care about overflow later on. */
16970 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
16973 /* Step 0: Decide on preferred algorithm, desired alignment and
16974 size of chunks to be copied by main loop. */
16976 alg = decide_alg (count, expected_size, true, &dynamic_check);
16977 desired_align = decide_alignment (align, alg, expected_size);
16979 if (!TARGET_ALIGN_STRINGOPS)
16980 align = desired_align;
16982 if (alg == libcall)
16984 gcc_assert (alg != no_stringop);
16986 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
16987 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16992 gcc_unreachable ();
16994 size_needed = GET_MODE_SIZE (Pmode);
16996 case unrolled_loop:
16997 size_needed = GET_MODE_SIZE (Pmode) * 4;
16999 case rep_prefix_8_byte:
17002 case rep_prefix_4_byte:
17005 case rep_prefix_1_byte:
17010 epilogue_size_needed = size_needed;
17012 /* Step 1: Prologue guard. */
17014 /* Alignment code needs count to be in register. */
17015 if (CONST_INT_P (count_exp) && desired_align > align)
17017 enum machine_mode mode = SImode;
17018 if (TARGET_64BIT && (count & ~0xffffffff))
17020 count_exp = force_reg (mode, count_exp);
17022 /* Do the cheap promotion to allow better CSE across the
17023 main loop and epilogue (ie one load of the big constant in the
17024 front of all code. */
17025 if (CONST_INT_P (val_exp))
17026 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
17027 desired_align, align);
17028 /* Ensure that alignment prologue won't copy past end of block. */
17029 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17031 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17032 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17033 Make sure it is power of 2. */
17034 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17036 /* To improve performance of small blocks, we jump around the VAL
17037 promoting mode. This mean that if the promoted VAL is not constant,
17038 we might not use it in the epilogue and have to use byte
17040 if (epilogue_size_needed > 2 && !promoted_val)
17041 force_loopy_epilogue = true;
17042 label = gen_label_rtx ();
17043 emit_cmp_and_jump_insns (count_exp,
17044 GEN_INT (epilogue_size_needed),
17045 LTU, 0, counter_mode (count_exp), 1, label);
17046 if (GET_CODE (count_exp) == CONST_INT)
17048 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
17049 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17051 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17053 if (dynamic_check != -1)
17055 rtx hot_label = gen_label_rtx ();
17056 jump_around_label = gen_label_rtx ();
17057 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17058 LEU, 0, counter_mode (count_exp), 1, hot_label);
17059 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17060 set_storage_via_libcall (dst, count_exp, val_exp, false);
17061 emit_jump (jump_around_label);
17062 emit_label (hot_label);
17065 /* Step 2: Alignment prologue. */
17067 /* Do the expensive promotion once we branched off the small blocks. */
17069 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
17070 desired_align, align);
17071 gcc_assert (desired_align >= 1 && align >= 1);
17073 if (desired_align > align)
17075 /* Except for the first move in epilogue, we no longer know
17076 constant offset in aliasing info. It don't seems to worth
17077 the pain to maintain it for the first move, so throw away
17079 dst = change_address (dst, BLKmode, destreg);
17080 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
17083 if (label && size_needed == 1)
17085 emit_label (label);
17086 LABEL_NUSES (label) = 1;
17090 /* Step 3: Main loop. */
17096 gcc_unreachable ();
17098 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17099 count_exp, QImode, 1, expected_size);
17102 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17103 count_exp, Pmode, 1, expected_size);
17105 case unrolled_loop:
17106 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17107 count_exp, Pmode, 4, expected_size);
17109 case rep_prefix_8_byte:
17110 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17113 case rep_prefix_4_byte:
17114 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17117 case rep_prefix_1_byte:
17118 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17122 /* Adjust properly the offset of src and dest memory for aliasing. */
17123 if (CONST_INT_P (count_exp))
17124 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17125 (count / size_needed) * size_needed);
17127 dst = change_address (dst, BLKmode, destreg);
17129 /* Step 4: Epilogue to copy the remaining bytes. */
17133 /* When the main loop is done, COUNT_EXP might hold original count,
17134 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17135 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17136 bytes. Compensate if needed. */
17138 if (size_needed < desired_align - align)
17141 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17142 GEN_INT (size_needed - 1), count_exp, 1,
17144 size_needed = desired_align - align + 1;
17145 if (tmp != count_exp)
17146 emit_move_insn (count_exp, tmp);
17148 emit_label (label);
17149 LABEL_NUSES (label) = 1;
17151 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17153 if (force_loopy_epilogue)
17154 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
17157 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
17160 if (jump_around_label)
17161 emit_label (jump_around_label);
17165 /* Expand the appropriate insns for doing strlen if not just doing
17168 out = result, initialized with the start address
17169 align_rtx = alignment of the address.
17170 scratch = scratch register, initialized with the startaddress when
17171 not aligned, otherwise undefined
17173 This is just the body. It needs the initializations mentioned above and
17174 some address computing at the end. These things are done in i386.md. */
17177 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
17181 rtx align_2_label = NULL_RTX;
17182 rtx align_3_label = NULL_RTX;
17183 rtx align_4_label = gen_label_rtx ();
17184 rtx end_0_label = gen_label_rtx ();
17186 rtx tmpreg = gen_reg_rtx (SImode);
17187 rtx scratch = gen_reg_rtx (SImode);
17191 if (CONST_INT_P (align_rtx))
17192 align = INTVAL (align_rtx);
17194 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
17196 /* Is there a known alignment and is it less than 4? */
17199 rtx scratch1 = gen_reg_rtx (Pmode);
17200 emit_move_insn (scratch1, out);
17201 /* Is there a known alignment and is it not 2? */
17204 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
17205 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
17207 /* Leave just the 3 lower bits. */
17208 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
17209 NULL_RTX, 0, OPTAB_WIDEN);
17211 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
17212 Pmode, 1, align_4_label);
17213 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
17214 Pmode, 1, align_2_label);
17215 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
17216 Pmode, 1, align_3_label);
17220 /* Since the alignment is 2, we have to check 2 or 0 bytes;
17221 check if is aligned to 4 - byte. */
17223 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
17224 NULL_RTX, 0, OPTAB_WIDEN);
17226 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
17227 Pmode, 1, align_4_label);
17230 mem = change_address (src, QImode, out);
17232 /* Now compare the bytes. */
17234 /* Compare the first n unaligned byte on a byte per byte basis. */
17235 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
17236 QImode, 1, end_0_label);
17238 /* Increment the address. */
17239 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17241 /* Not needed with an alignment of 2 */
17244 emit_label (align_2_label);
17246 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
17249 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17251 emit_label (align_3_label);
17254 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
17257 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17260 /* Generate loop to check 4 bytes at a time. It is not a good idea to
17261 align this loop. It gives only huge programs, but does not help to
17263 emit_label (align_4_label);
17265 mem = change_address (src, SImode, out);
17266 emit_move_insn (scratch, mem);
17267 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
17269 /* This formula yields a nonzero result iff one of the bytes is zero.
17270 This saves three branches inside loop and many cycles. */
17272 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
17273 emit_insn (gen_one_cmplsi2 (scratch, scratch));
17274 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
17275 emit_insn (gen_andsi3 (tmpreg, tmpreg,
17276 gen_int_mode (0x80808080, SImode)));
17277 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
17282 rtx reg = gen_reg_rtx (SImode);
17283 rtx reg2 = gen_reg_rtx (Pmode);
17284 emit_move_insn (reg, tmpreg);
17285 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
17287 /* If zero is not in the first two bytes, move two bytes forward. */
17288 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
17289 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17290 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17291 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
17292 gen_rtx_IF_THEN_ELSE (SImode, tmp,
17295 /* Emit lea manually to avoid clobbering of flags. */
17296 emit_insn (gen_rtx_SET (SImode, reg2,
17297 gen_rtx_PLUS (Pmode, out, const2_rtx)));
17299 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17300 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17301 emit_insn (gen_rtx_SET (VOIDmode, out,
17302 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
17309 rtx end_2_label = gen_label_rtx ();
17310 /* Is zero in the first two bytes? */
17312 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
17313 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17314 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
17315 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17316 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
17318 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17319 JUMP_LABEL (tmp) = end_2_label;
17321 /* Not in the first two. Move two bytes forward. */
17322 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
17323 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
17325 emit_label (end_2_label);
17329 /* Avoid branch in fixing the byte. */
17330 tmpreg = gen_lowpart (QImode, tmpreg);
17331 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
17332 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
17333 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
17335 emit_label (end_0_label);
17338 /* Expand strlen. */
17341 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
17343 rtx addr, scratch1, scratch2, scratch3, scratch4;
17345 /* The generic case of strlen expander is long. Avoid it's
17346 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
17348 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
17349 && !TARGET_INLINE_ALL_STRINGOPS
17351 && (!CONST_INT_P (align) || INTVAL (align) < 4))
17354 addr = force_reg (Pmode, XEXP (src, 0));
17355 scratch1 = gen_reg_rtx (Pmode);
17357 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
17360 /* Well it seems that some optimizer does not combine a call like
17361 foo(strlen(bar), strlen(bar));
17362 when the move and the subtraction is done here. It does calculate
17363 the length just once when these instructions are done inside of
17364 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
17365 often used and I use one fewer register for the lifetime of
17366 output_strlen_unroll() this is better. */
17368 emit_move_insn (out, addr);
17370 ix86_expand_strlensi_unroll_1 (out, src, align);
17372 /* strlensi_unroll_1 returns the address of the zero at the end of
17373 the string, like memchr(), so compute the length by subtracting
17374 the start address. */
17375 emit_insn ((*ix86_gen_sub3) (out, out, addr));
17381 /* Can't use this if the user has appropriated eax, ecx, or edi. */
17382 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
17385 scratch2 = gen_reg_rtx (Pmode);
17386 scratch3 = gen_reg_rtx (Pmode);
17387 scratch4 = force_reg (Pmode, constm1_rtx);
17389 emit_move_insn (scratch3, addr);
17390 eoschar = force_reg (QImode, eoschar);
17392 src = replace_equiv_address_nv (src, scratch3);
17394 /* If .md starts supporting :P, this can be done in .md. */
17395 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
17396 scratch4), UNSPEC_SCAS);
17397 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
17398 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
17399 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
17404 /* For given symbol (function) construct code to compute address of it's PLT
17405 entry in large x86-64 PIC model. */
17407 construct_plt_address (rtx symbol)
17409 rtx tmp = gen_reg_rtx (Pmode);
17410 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
17412 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
17413 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
17415 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
17416 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
17421 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
17422 rtx callarg2 ATTRIBUTE_UNUSED,
17423 rtx pop, int sibcall)
17425 rtx use = NULL, call;
17427 if (pop == const0_rtx)
17429 gcc_assert (!TARGET_64BIT || !pop);
17431 if (TARGET_MACHO && !TARGET_64BIT)
17434 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
17435 fnaddr = machopic_indirect_call_target (fnaddr);
17440 /* Static functions and indirect calls don't need the pic register. */
17441 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
17442 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
17443 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
17444 use_reg (&use, pic_offset_table_rtx);
17447 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
17449 rtx al = gen_rtx_REG (QImode, AX_REG);
17450 emit_move_insn (al, callarg2);
17451 use_reg (&use, al);
17454 if (ix86_cmodel == CM_LARGE_PIC
17455 && GET_CODE (fnaddr) == MEM
17456 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
17457 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
17458 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
17459 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
17461 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
17462 fnaddr = gen_rtx_MEM (QImode, fnaddr);
17464 if (sibcall && TARGET_64BIT
17465 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
17468 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
17469 fnaddr = gen_rtx_REG (Pmode, R11_REG);
17470 emit_move_insn (fnaddr, addr);
17471 fnaddr = gen_rtx_MEM (QImode, fnaddr);
17474 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
17476 call = gen_rtx_SET (VOIDmode, retval, call);
17479 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
17480 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
17481 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
17484 call = emit_call_insn (call);
17486 CALL_INSN_FUNCTION_USAGE (call) = use;
17490 /* Clear stack slot assignments remembered from previous functions.
17491 This is called from INIT_EXPANDERS once before RTL is emitted for each
17494 static struct machine_function *
17495 ix86_init_machine_status (void)
17497 struct machine_function *f;
17499 f = GGC_CNEW (struct machine_function);
17500 f->use_fast_prologue_epilogue_nregs = -1;
17501 f->tls_descriptor_call_expanded_p = 0;
17502 f->call_abi = DEFAULT_ABI;
17507 /* Return a MEM corresponding to a stack slot with mode MODE.
17508 Allocate a new slot if necessary.
17510 The RTL for a function can have several slots available: N is
17511 which slot to use. */
17514 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
17516 struct stack_local_entry *s;
17518 gcc_assert (n < MAX_386_STACK_LOCALS);
17520 /* Virtual slot is valid only before vregs are instantiated. */
17521 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
17523 for (s = ix86_stack_locals; s; s = s->next)
17524 if (s->mode == mode && s->n == n)
17525 return copy_rtx (s->rtl);
17527 s = (struct stack_local_entry *)
17528 ggc_alloc (sizeof (struct stack_local_entry));
17531 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
17533 s->next = ix86_stack_locals;
17534 ix86_stack_locals = s;
17538 /* Construct the SYMBOL_REF for the tls_get_addr function. */
17540 static GTY(()) rtx ix86_tls_symbol;
17542 ix86_tls_get_addr (void)
17545 if (!ix86_tls_symbol)
17547 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
17548 (TARGET_ANY_GNU_TLS
17550 ? "___tls_get_addr"
17551 : "__tls_get_addr");
17554 return ix86_tls_symbol;
17557 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
17559 static GTY(()) rtx ix86_tls_module_base_symbol;
17561 ix86_tls_module_base (void)
17564 if (!ix86_tls_module_base_symbol)
17566 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
17567 "_TLS_MODULE_BASE_");
17568 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
17569 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
17572 return ix86_tls_module_base_symbol;
17575 /* Calculate the length of the memory address in the instruction
17576 encoding. Does not include the one-byte modrm, opcode, or prefix. */
17579 memory_address_length (rtx addr)
17581 struct ix86_address parts;
17582 rtx base, index, disp;
17586 if (GET_CODE (addr) == PRE_DEC
17587 || GET_CODE (addr) == POST_INC
17588 || GET_CODE (addr) == PRE_MODIFY
17589 || GET_CODE (addr) == POST_MODIFY)
17592 ok = ix86_decompose_address (addr, &parts);
17595 if (parts.base && GET_CODE (parts.base) == SUBREG)
17596 parts.base = SUBREG_REG (parts.base);
17597 if (parts.index && GET_CODE (parts.index) == SUBREG)
17598 parts.index = SUBREG_REG (parts.index);
17601 index = parts.index;
17606 - esp as the base always wants an index,
17607 - ebp as the base always wants a displacement. */
17609 /* Register Indirect. */
17610 if (base && !index && !disp)
17612 /* esp (for its index) and ebp (for its displacement) need
17613 the two-byte modrm form. */
17614 if (addr == stack_pointer_rtx
17615 || addr == arg_pointer_rtx
17616 || addr == frame_pointer_rtx
17617 || addr == hard_frame_pointer_rtx)
17621 /* Direct Addressing. */
17622 else if (disp && !base && !index)
17627 /* Find the length of the displacement constant. */
17630 if (base && satisfies_constraint_K (disp))
17635 /* ebp always wants a displacement. */
17636 else if (base == hard_frame_pointer_rtx)
17639 /* An index requires the two-byte modrm form.... */
17641 /* ...like esp, which always wants an index. */
17642 || base == stack_pointer_rtx
17643 || base == arg_pointer_rtx
17644 || base == frame_pointer_rtx)
17651 /* Compute default value for "length_immediate" attribute. When SHORTFORM
17652 is set, expect that insn have 8bit immediate alternative. */
17654 ix86_attr_length_immediate_default (rtx insn, int shortform)
17658 extract_insn_cached (insn);
17659 for (i = recog_data.n_operands - 1; i >= 0; --i)
17660 if (CONSTANT_P (recog_data.operand[i]))
17663 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
17667 switch (get_attr_mode (insn))
17678 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
17683 fatal_insn ("unknown insn mode", insn);
17689 /* Compute default value for "length_address" attribute. */
17691 ix86_attr_length_address_default (rtx insn)
17695 if (get_attr_type (insn) == TYPE_LEA)
17697 rtx set = PATTERN (insn);
17699 if (GET_CODE (set) == PARALLEL)
17700 set = XVECEXP (set, 0, 0);
17702 gcc_assert (GET_CODE (set) == SET);
17704 return memory_address_length (SET_SRC (set));
17707 extract_insn_cached (insn);
17708 for (i = recog_data.n_operands - 1; i >= 0; --i)
17709 if (MEM_P (recog_data.operand[i]))
17711 return memory_address_length (XEXP (recog_data.operand[i], 0));
17717 /* Return the maximum number of instructions a cpu can issue. */
17720 ix86_issue_rate (void)
17724 case PROCESSOR_PENTIUM:
17728 case PROCESSOR_PENTIUMPRO:
17729 case PROCESSOR_PENTIUM4:
17730 case PROCESSOR_ATHLON:
17732 case PROCESSOR_AMDFAM10:
17733 case PROCESSOR_NOCONA:
17734 case PROCESSOR_GENERIC32:
17735 case PROCESSOR_GENERIC64:
17738 case PROCESSOR_CORE2:
17746 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
17747 by DEP_INSN and nothing set by DEP_INSN. */
17750 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
17754 /* Simplify the test for uninteresting insns. */
17755 if (insn_type != TYPE_SETCC
17756 && insn_type != TYPE_ICMOV
17757 && insn_type != TYPE_FCMOV
17758 && insn_type != TYPE_IBR)
17761 if ((set = single_set (dep_insn)) != 0)
17763 set = SET_DEST (set);
17766 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
17767 && XVECLEN (PATTERN (dep_insn), 0) == 2
17768 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
17769 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
17771 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
17772 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
17777 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
17780 /* This test is true if the dependent insn reads the flags but
17781 not any other potentially set register. */
17782 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
17785 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
17791 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
17792 address with operands set by DEP_INSN. */
17795 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
17799 if (insn_type == TYPE_LEA
17802 addr = PATTERN (insn);
17804 if (GET_CODE (addr) == PARALLEL)
17805 addr = XVECEXP (addr, 0, 0);
17807 gcc_assert (GET_CODE (addr) == SET);
17809 addr = SET_SRC (addr);
17814 extract_insn_cached (insn);
17815 for (i = recog_data.n_operands - 1; i >= 0; --i)
17816 if (MEM_P (recog_data.operand[i]))
17818 addr = XEXP (recog_data.operand[i], 0);
17825 return modified_in_p (addr, dep_insn);
17829 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
17831 enum attr_type insn_type, dep_insn_type;
17832 enum attr_memory memory;
17834 int dep_insn_code_number;
17836 /* Anti and output dependencies have zero cost on all CPUs. */
17837 if (REG_NOTE_KIND (link) != 0)
17840 dep_insn_code_number = recog_memoized (dep_insn);
17842 /* If we can't recognize the insns, we can't really do anything. */
17843 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
17846 insn_type = get_attr_type (insn);
17847 dep_insn_type = get_attr_type (dep_insn);
17851 case PROCESSOR_PENTIUM:
17852 /* Address Generation Interlock adds a cycle of latency. */
17853 if (ix86_agi_dependent (insn, dep_insn, insn_type))
17856 /* ??? Compares pair with jump/setcc. */
17857 if (ix86_flags_dependent (insn, dep_insn, insn_type))
17860 /* Floating point stores require value to be ready one cycle earlier. */
17861 if (insn_type == TYPE_FMOV
17862 && get_attr_memory (insn) == MEMORY_STORE
17863 && !ix86_agi_dependent (insn, dep_insn, insn_type))
17867 case PROCESSOR_PENTIUMPRO:
17868 memory = get_attr_memory (insn);
17870 /* INT->FP conversion is expensive. */
17871 if (get_attr_fp_int_src (dep_insn))
17874 /* There is one cycle extra latency between an FP op and a store. */
17875 if (insn_type == TYPE_FMOV
17876 && (set = single_set (dep_insn)) != NULL_RTX
17877 && (set2 = single_set (insn)) != NULL_RTX
17878 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
17879 && MEM_P (SET_DEST (set2)))
17882 /* Show ability of reorder buffer to hide latency of load by executing
17883 in parallel with previous instruction in case
17884 previous instruction is not needed to compute the address. */
17885 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
17886 && !ix86_agi_dependent (insn, dep_insn, insn_type))
17888 /* Claim moves to take one cycle, as core can issue one load
17889 at time and the next load can start cycle later. */
17890 if (dep_insn_type == TYPE_IMOV
17891 || dep_insn_type == TYPE_FMOV)
17899 memory = get_attr_memory (insn);
17901 /* The esp dependency is resolved before the instruction is really
17903 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
17904 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
17907 /* INT->FP conversion is expensive. */
17908 if (get_attr_fp_int_src (dep_insn))
17911 /* Show ability of reorder buffer to hide latency of load by executing
17912 in parallel with previous instruction in case
17913 previous instruction is not needed to compute the address. */
17914 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
17915 && !ix86_agi_dependent (insn, dep_insn, insn_type))
17917 /* Claim moves to take one cycle, as core can issue one load
17918 at time and the next load can start cycle later. */
17919 if (dep_insn_type == TYPE_IMOV
17920 || dep_insn_type == TYPE_FMOV)
17929 case PROCESSOR_ATHLON:
17931 case PROCESSOR_AMDFAM10:
17932 case PROCESSOR_GENERIC32:
17933 case PROCESSOR_GENERIC64:
17934 memory = get_attr_memory (insn);
17936 /* Show ability of reorder buffer to hide latency of load by executing
17937 in parallel with previous instruction in case
17938 previous instruction is not needed to compute the address. */
17939 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
17940 && !ix86_agi_dependent (insn, dep_insn, insn_type))
17942 enum attr_unit unit = get_attr_unit (insn);
17945 /* Because of the difference between the length of integer and
17946 floating unit pipeline preparation stages, the memory operands
17947 for floating point are cheaper.
17949 ??? For Athlon it the difference is most probably 2. */
17950 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
17953 loadcost = TARGET_ATHLON ? 2 : 0;
17955 if (cost >= loadcost)
17968 /* How many alternative schedules to try. This should be as wide as the
17969 scheduling freedom in the DFA, but no wider. Making this value too
17970 large results extra work for the scheduler. */
17973 ia32_multipass_dfa_lookahead (void)
17977 case PROCESSOR_PENTIUM:
17980 case PROCESSOR_PENTIUMPRO:
17990 /* Compute the alignment given to a constant that is being placed in memory.
17991 EXP is the constant and ALIGN is the alignment that the object would
17993 The value of this function is used instead of that alignment to align
17997 ix86_constant_alignment (tree exp, int align)
17999 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
18000 || TREE_CODE (exp) == INTEGER_CST)
18002 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
18004 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
18007 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
18008 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
18009 return BITS_PER_WORD;
18014 /* Compute the alignment for a static variable.
18015 TYPE is the data type, and ALIGN is the alignment that
18016 the object would ordinarily have. The value of this function is used
18017 instead of that alignment to align the object. */
18020 ix86_data_alignment (tree type, int align)
18022 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
18024 if (AGGREGATE_TYPE_P (type)
18025 && TYPE_SIZE (type)
18026 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18027 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
18028 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
18029 && align < max_align)
18032 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18033 to 16byte boundary. */
18036 if (AGGREGATE_TYPE_P (type)
18037 && TYPE_SIZE (type)
18038 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18039 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
18040 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
18044 if (TREE_CODE (type) == ARRAY_TYPE)
18046 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18048 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18051 else if (TREE_CODE (type) == COMPLEX_TYPE)
18054 if (TYPE_MODE (type) == DCmode && align < 64)
18056 if ((TYPE_MODE (type) == XCmode
18057 || TYPE_MODE (type) == TCmode) && align < 128)
18060 else if ((TREE_CODE (type) == RECORD_TYPE
18061 || TREE_CODE (type) == UNION_TYPE
18062 || TREE_CODE (type) == QUAL_UNION_TYPE)
18063 && TYPE_FIELDS (type))
18065 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18067 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18070 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
18071 || TREE_CODE (type) == INTEGER_TYPE)
18073 if (TYPE_MODE (type) == DFmode && align < 64)
18075 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18082 /* Compute the alignment for a local variable or a stack slot. TYPE is
18083 the data type, MODE is the widest mode available and ALIGN is the
18084 alignment that the object would ordinarily have. The value of this
18085 macro is used instead of that alignment to align the object. */
18088 ix86_local_alignment (tree type, enum machine_mode mode,
18089 unsigned int align)
18091 /* If TYPE is NULL, we are allocating a stack slot for caller-save
18092 register in MODE. We will return the largest alignment of XF
18096 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18097 align = GET_MODE_ALIGNMENT (DFmode);
18101 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18102 to 16byte boundary. */
18105 if (AGGREGATE_TYPE_P (type)
18106 && TYPE_SIZE (type)
18107 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18108 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
18109 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
18112 if (TREE_CODE (type) == ARRAY_TYPE)
18114 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18116 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18119 else if (TREE_CODE (type) == COMPLEX_TYPE)
18121 if (TYPE_MODE (type) == DCmode && align < 64)
18123 if ((TYPE_MODE (type) == XCmode
18124 || TYPE_MODE (type) == TCmode) && align < 128)
18127 else if ((TREE_CODE (type) == RECORD_TYPE
18128 || TREE_CODE (type) == UNION_TYPE
18129 || TREE_CODE (type) == QUAL_UNION_TYPE)
18130 && TYPE_FIELDS (type))
18132 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18134 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18137 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
18138 || TREE_CODE (type) == INTEGER_TYPE)
18141 if (TYPE_MODE (type) == DFmode && align < 64)
18143 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18149 /* Emit RTL insns to initialize the variable parts of a trampoline.
18150 FNADDR is an RTX for the address of the function's pure code.
18151 CXT is an RTX for the static chain value for the function. */
18153 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
18157 /* Compute offset from the end of the jmp to the target function. */
18158 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
18159 plus_constant (tramp, 10),
18160 NULL_RTX, 1, OPTAB_DIRECT);
18161 emit_move_insn (gen_rtx_MEM (QImode, tramp),
18162 gen_int_mode (0xb9, QImode));
18163 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
18164 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
18165 gen_int_mode (0xe9, QImode));
18166 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
18171 /* Try to load address using shorter movl instead of movabs.
18172 We may want to support movq for kernel mode, but kernel does not use
18173 trampolines at the moment. */
18174 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18176 fnaddr = copy_to_mode_reg (DImode, fnaddr);
18177 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18178 gen_int_mode (0xbb41, HImode));
18179 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
18180 gen_lowpart (SImode, fnaddr));
18185 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18186 gen_int_mode (0xbb49, HImode));
18187 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
18191 /* Load static chain using movabs to r10. */
18192 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18193 gen_int_mode (0xba49, HImode));
18194 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
18197 /* Jump to the r11 */
18198 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18199 gen_int_mode (0xff49, HImode));
18200 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
18201 gen_int_mode (0xe3, QImode));
18203 gcc_assert (offset <= TRAMPOLINE_SIZE);
18206 #ifdef ENABLE_EXECUTE_STACK
18207 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
18208 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
18212 /* Codes for all the SSE/MMX builtins. */
18215 IX86_BUILTIN_ADDPS,
18216 IX86_BUILTIN_ADDSS,
18217 IX86_BUILTIN_DIVPS,
18218 IX86_BUILTIN_DIVSS,
18219 IX86_BUILTIN_MULPS,
18220 IX86_BUILTIN_MULSS,
18221 IX86_BUILTIN_SUBPS,
18222 IX86_BUILTIN_SUBSS,
18224 IX86_BUILTIN_CMPEQPS,
18225 IX86_BUILTIN_CMPLTPS,
18226 IX86_BUILTIN_CMPLEPS,
18227 IX86_BUILTIN_CMPGTPS,
18228 IX86_BUILTIN_CMPGEPS,
18229 IX86_BUILTIN_CMPNEQPS,
18230 IX86_BUILTIN_CMPNLTPS,
18231 IX86_BUILTIN_CMPNLEPS,
18232 IX86_BUILTIN_CMPNGTPS,
18233 IX86_BUILTIN_CMPNGEPS,
18234 IX86_BUILTIN_CMPORDPS,
18235 IX86_BUILTIN_CMPUNORDPS,
18236 IX86_BUILTIN_CMPEQSS,
18237 IX86_BUILTIN_CMPLTSS,
18238 IX86_BUILTIN_CMPLESS,
18239 IX86_BUILTIN_CMPNEQSS,
18240 IX86_BUILTIN_CMPNLTSS,
18241 IX86_BUILTIN_CMPNLESS,
18242 IX86_BUILTIN_CMPNGTSS,
18243 IX86_BUILTIN_CMPNGESS,
18244 IX86_BUILTIN_CMPORDSS,
18245 IX86_BUILTIN_CMPUNORDSS,
18247 IX86_BUILTIN_COMIEQSS,
18248 IX86_BUILTIN_COMILTSS,
18249 IX86_BUILTIN_COMILESS,
18250 IX86_BUILTIN_COMIGTSS,
18251 IX86_BUILTIN_COMIGESS,
18252 IX86_BUILTIN_COMINEQSS,
18253 IX86_BUILTIN_UCOMIEQSS,
18254 IX86_BUILTIN_UCOMILTSS,
18255 IX86_BUILTIN_UCOMILESS,
18256 IX86_BUILTIN_UCOMIGTSS,
18257 IX86_BUILTIN_UCOMIGESS,
18258 IX86_BUILTIN_UCOMINEQSS,
18260 IX86_BUILTIN_CVTPI2PS,
18261 IX86_BUILTIN_CVTPS2PI,
18262 IX86_BUILTIN_CVTSI2SS,
18263 IX86_BUILTIN_CVTSI642SS,
18264 IX86_BUILTIN_CVTSS2SI,
18265 IX86_BUILTIN_CVTSS2SI64,
18266 IX86_BUILTIN_CVTTPS2PI,
18267 IX86_BUILTIN_CVTTSS2SI,
18268 IX86_BUILTIN_CVTTSS2SI64,
18270 IX86_BUILTIN_MAXPS,
18271 IX86_BUILTIN_MAXSS,
18272 IX86_BUILTIN_MINPS,
18273 IX86_BUILTIN_MINSS,
18275 IX86_BUILTIN_LOADUPS,
18276 IX86_BUILTIN_STOREUPS,
18277 IX86_BUILTIN_MOVSS,
18279 IX86_BUILTIN_MOVHLPS,
18280 IX86_BUILTIN_MOVLHPS,
18281 IX86_BUILTIN_LOADHPS,
18282 IX86_BUILTIN_LOADLPS,
18283 IX86_BUILTIN_STOREHPS,
18284 IX86_BUILTIN_STORELPS,
18286 IX86_BUILTIN_MASKMOVQ,
18287 IX86_BUILTIN_MOVMSKPS,
18288 IX86_BUILTIN_PMOVMSKB,
18290 IX86_BUILTIN_MOVNTPS,
18291 IX86_BUILTIN_MOVNTQ,
18293 IX86_BUILTIN_LOADDQU,
18294 IX86_BUILTIN_STOREDQU,
18296 IX86_BUILTIN_PACKSSWB,
18297 IX86_BUILTIN_PACKSSDW,
18298 IX86_BUILTIN_PACKUSWB,
18300 IX86_BUILTIN_PADDB,
18301 IX86_BUILTIN_PADDW,
18302 IX86_BUILTIN_PADDD,
18303 IX86_BUILTIN_PADDQ,
18304 IX86_BUILTIN_PADDSB,
18305 IX86_BUILTIN_PADDSW,
18306 IX86_BUILTIN_PADDUSB,
18307 IX86_BUILTIN_PADDUSW,
18308 IX86_BUILTIN_PSUBB,
18309 IX86_BUILTIN_PSUBW,
18310 IX86_BUILTIN_PSUBD,
18311 IX86_BUILTIN_PSUBQ,
18312 IX86_BUILTIN_PSUBSB,
18313 IX86_BUILTIN_PSUBSW,
18314 IX86_BUILTIN_PSUBUSB,
18315 IX86_BUILTIN_PSUBUSW,
18318 IX86_BUILTIN_PANDN,
18322 IX86_BUILTIN_PAVGB,
18323 IX86_BUILTIN_PAVGW,
18325 IX86_BUILTIN_PCMPEQB,
18326 IX86_BUILTIN_PCMPEQW,
18327 IX86_BUILTIN_PCMPEQD,
18328 IX86_BUILTIN_PCMPGTB,
18329 IX86_BUILTIN_PCMPGTW,
18330 IX86_BUILTIN_PCMPGTD,
18332 IX86_BUILTIN_PMADDWD,
18334 IX86_BUILTIN_PMAXSW,
18335 IX86_BUILTIN_PMAXUB,
18336 IX86_BUILTIN_PMINSW,
18337 IX86_BUILTIN_PMINUB,
18339 IX86_BUILTIN_PMULHUW,
18340 IX86_BUILTIN_PMULHW,
18341 IX86_BUILTIN_PMULLW,
18343 IX86_BUILTIN_PSADBW,
18344 IX86_BUILTIN_PSHUFW,
18346 IX86_BUILTIN_PSLLW,
18347 IX86_BUILTIN_PSLLD,
18348 IX86_BUILTIN_PSLLQ,
18349 IX86_BUILTIN_PSRAW,
18350 IX86_BUILTIN_PSRAD,
18351 IX86_BUILTIN_PSRLW,
18352 IX86_BUILTIN_PSRLD,
18353 IX86_BUILTIN_PSRLQ,
18354 IX86_BUILTIN_PSLLWI,
18355 IX86_BUILTIN_PSLLDI,
18356 IX86_BUILTIN_PSLLQI,
18357 IX86_BUILTIN_PSRAWI,
18358 IX86_BUILTIN_PSRADI,
18359 IX86_BUILTIN_PSRLWI,
18360 IX86_BUILTIN_PSRLDI,
18361 IX86_BUILTIN_PSRLQI,
18363 IX86_BUILTIN_PUNPCKHBW,
18364 IX86_BUILTIN_PUNPCKHWD,
18365 IX86_BUILTIN_PUNPCKHDQ,
18366 IX86_BUILTIN_PUNPCKLBW,
18367 IX86_BUILTIN_PUNPCKLWD,
18368 IX86_BUILTIN_PUNPCKLDQ,
18370 IX86_BUILTIN_SHUFPS,
18372 IX86_BUILTIN_RCPPS,
18373 IX86_BUILTIN_RCPSS,
18374 IX86_BUILTIN_RSQRTPS,
18375 IX86_BUILTIN_RSQRTPS_NR,
18376 IX86_BUILTIN_RSQRTSS,
18377 IX86_BUILTIN_RSQRTF,
18378 IX86_BUILTIN_SQRTPS,
18379 IX86_BUILTIN_SQRTPS_NR,
18380 IX86_BUILTIN_SQRTSS,
18382 IX86_BUILTIN_UNPCKHPS,
18383 IX86_BUILTIN_UNPCKLPS,
18385 IX86_BUILTIN_ANDPS,
18386 IX86_BUILTIN_ANDNPS,
18388 IX86_BUILTIN_XORPS,
18391 IX86_BUILTIN_LDMXCSR,
18392 IX86_BUILTIN_STMXCSR,
18393 IX86_BUILTIN_SFENCE,
18395 /* 3DNow! Original */
18396 IX86_BUILTIN_FEMMS,
18397 IX86_BUILTIN_PAVGUSB,
18398 IX86_BUILTIN_PF2ID,
18399 IX86_BUILTIN_PFACC,
18400 IX86_BUILTIN_PFADD,
18401 IX86_BUILTIN_PFCMPEQ,
18402 IX86_BUILTIN_PFCMPGE,
18403 IX86_BUILTIN_PFCMPGT,
18404 IX86_BUILTIN_PFMAX,
18405 IX86_BUILTIN_PFMIN,
18406 IX86_BUILTIN_PFMUL,
18407 IX86_BUILTIN_PFRCP,
18408 IX86_BUILTIN_PFRCPIT1,
18409 IX86_BUILTIN_PFRCPIT2,
18410 IX86_BUILTIN_PFRSQIT1,
18411 IX86_BUILTIN_PFRSQRT,
18412 IX86_BUILTIN_PFSUB,
18413 IX86_BUILTIN_PFSUBR,
18414 IX86_BUILTIN_PI2FD,
18415 IX86_BUILTIN_PMULHRW,
18417 /* 3DNow! Athlon Extensions */
18418 IX86_BUILTIN_PF2IW,
18419 IX86_BUILTIN_PFNACC,
18420 IX86_BUILTIN_PFPNACC,
18421 IX86_BUILTIN_PI2FW,
18422 IX86_BUILTIN_PSWAPDSI,
18423 IX86_BUILTIN_PSWAPDSF,
18426 IX86_BUILTIN_ADDPD,
18427 IX86_BUILTIN_ADDSD,
18428 IX86_BUILTIN_DIVPD,
18429 IX86_BUILTIN_DIVSD,
18430 IX86_BUILTIN_MULPD,
18431 IX86_BUILTIN_MULSD,
18432 IX86_BUILTIN_SUBPD,
18433 IX86_BUILTIN_SUBSD,
18435 IX86_BUILTIN_CMPEQPD,
18436 IX86_BUILTIN_CMPLTPD,
18437 IX86_BUILTIN_CMPLEPD,
18438 IX86_BUILTIN_CMPGTPD,
18439 IX86_BUILTIN_CMPGEPD,
18440 IX86_BUILTIN_CMPNEQPD,
18441 IX86_BUILTIN_CMPNLTPD,
18442 IX86_BUILTIN_CMPNLEPD,
18443 IX86_BUILTIN_CMPNGTPD,
18444 IX86_BUILTIN_CMPNGEPD,
18445 IX86_BUILTIN_CMPORDPD,
18446 IX86_BUILTIN_CMPUNORDPD,
18447 IX86_BUILTIN_CMPEQSD,
18448 IX86_BUILTIN_CMPLTSD,
18449 IX86_BUILTIN_CMPLESD,
18450 IX86_BUILTIN_CMPNEQSD,
18451 IX86_BUILTIN_CMPNLTSD,
18452 IX86_BUILTIN_CMPNLESD,
18453 IX86_BUILTIN_CMPORDSD,
18454 IX86_BUILTIN_CMPUNORDSD,
18456 IX86_BUILTIN_COMIEQSD,
18457 IX86_BUILTIN_COMILTSD,
18458 IX86_BUILTIN_COMILESD,
18459 IX86_BUILTIN_COMIGTSD,
18460 IX86_BUILTIN_COMIGESD,
18461 IX86_BUILTIN_COMINEQSD,
18462 IX86_BUILTIN_UCOMIEQSD,
18463 IX86_BUILTIN_UCOMILTSD,
18464 IX86_BUILTIN_UCOMILESD,
18465 IX86_BUILTIN_UCOMIGTSD,
18466 IX86_BUILTIN_UCOMIGESD,
18467 IX86_BUILTIN_UCOMINEQSD,
18469 IX86_BUILTIN_MAXPD,
18470 IX86_BUILTIN_MAXSD,
18471 IX86_BUILTIN_MINPD,
18472 IX86_BUILTIN_MINSD,
18474 IX86_BUILTIN_ANDPD,
18475 IX86_BUILTIN_ANDNPD,
18477 IX86_BUILTIN_XORPD,
18479 IX86_BUILTIN_SQRTPD,
18480 IX86_BUILTIN_SQRTSD,
18482 IX86_BUILTIN_UNPCKHPD,
18483 IX86_BUILTIN_UNPCKLPD,
18485 IX86_BUILTIN_SHUFPD,
18487 IX86_BUILTIN_LOADUPD,
18488 IX86_BUILTIN_STOREUPD,
18489 IX86_BUILTIN_MOVSD,
18491 IX86_BUILTIN_LOADHPD,
18492 IX86_BUILTIN_LOADLPD,
18494 IX86_BUILTIN_CVTDQ2PD,
18495 IX86_BUILTIN_CVTDQ2PS,
18497 IX86_BUILTIN_CVTPD2DQ,
18498 IX86_BUILTIN_CVTPD2PI,
18499 IX86_BUILTIN_CVTPD2PS,
18500 IX86_BUILTIN_CVTTPD2DQ,
18501 IX86_BUILTIN_CVTTPD2PI,
18503 IX86_BUILTIN_CVTPI2PD,
18504 IX86_BUILTIN_CVTSI2SD,
18505 IX86_BUILTIN_CVTSI642SD,
18507 IX86_BUILTIN_CVTSD2SI,
18508 IX86_BUILTIN_CVTSD2SI64,
18509 IX86_BUILTIN_CVTSD2SS,
18510 IX86_BUILTIN_CVTSS2SD,
18511 IX86_BUILTIN_CVTTSD2SI,
18512 IX86_BUILTIN_CVTTSD2SI64,
18514 IX86_BUILTIN_CVTPS2DQ,
18515 IX86_BUILTIN_CVTPS2PD,
18516 IX86_BUILTIN_CVTTPS2DQ,
18518 IX86_BUILTIN_MOVNTI,
18519 IX86_BUILTIN_MOVNTPD,
18520 IX86_BUILTIN_MOVNTDQ,
18523 IX86_BUILTIN_MASKMOVDQU,
18524 IX86_BUILTIN_MOVMSKPD,
18525 IX86_BUILTIN_PMOVMSKB128,
18527 IX86_BUILTIN_PACKSSWB128,
18528 IX86_BUILTIN_PACKSSDW128,
18529 IX86_BUILTIN_PACKUSWB128,
18531 IX86_BUILTIN_PADDB128,
18532 IX86_BUILTIN_PADDW128,
18533 IX86_BUILTIN_PADDD128,
18534 IX86_BUILTIN_PADDQ128,
18535 IX86_BUILTIN_PADDSB128,
18536 IX86_BUILTIN_PADDSW128,
18537 IX86_BUILTIN_PADDUSB128,
18538 IX86_BUILTIN_PADDUSW128,
18539 IX86_BUILTIN_PSUBB128,
18540 IX86_BUILTIN_PSUBW128,
18541 IX86_BUILTIN_PSUBD128,
18542 IX86_BUILTIN_PSUBQ128,
18543 IX86_BUILTIN_PSUBSB128,
18544 IX86_BUILTIN_PSUBSW128,
18545 IX86_BUILTIN_PSUBUSB128,
18546 IX86_BUILTIN_PSUBUSW128,
18548 IX86_BUILTIN_PAND128,
18549 IX86_BUILTIN_PANDN128,
18550 IX86_BUILTIN_POR128,
18551 IX86_BUILTIN_PXOR128,
18553 IX86_BUILTIN_PAVGB128,
18554 IX86_BUILTIN_PAVGW128,
18556 IX86_BUILTIN_PCMPEQB128,
18557 IX86_BUILTIN_PCMPEQW128,
18558 IX86_BUILTIN_PCMPEQD128,
18559 IX86_BUILTIN_PCMPGTB128,
18560 IX86_BUILTIN_PCMPGTW128,
18561 IX86_BUILTIN_PCMPGTD128,
18563 IX86_BUILTIN_PMADDWD128,
18565 IX86_BUILTIN_PMAXSW128,
18566 IX86_BUILTIN_PMAXUB128,
18567 IX86_BUILTIN_PMINSW128,
18568 IX86_BUILTIN_PMINUB128,
18570 IX86_BUILTIN_PMULUDQ,
18571 IX86_BUILTIN_PMULUDQ128,
18572 IX86_BUILTIN_PMULHUW128,
18573 IX86_BUILTIN_PMULHW128,
18574 IX86_BUILTIN_PMULLW128,
18576 IX86_BUILTIN_PSADBW128,
18577 IX86_BUILTIN_PSHUFHW,
18578 IX86_BUILTIN_PSHUFLW,
18579 IX86_BUILTIN_PSHUFD,
18581 IX86_BUILTIN_PSLLDQI128,
18582 IX86_BUILTIN_PSLLWI128,
18583 IX86_BUILTIN_PSLLDI128,
18584 IX86_BUILTIN_PSLLQI128,
18585 IX86_BUILTIN_PSRAWI128,
18586 IX86_BUILTIN_PSRADI128,
18587 IX86_BUILTIN_PSRLDQI128,
18588 IX86_BUILTIN_PSRLWI128,
18589 IX86_BUILTIN_PSRLDI128,
18590 IX86_BUILTIN_PSRLQI128,
18592 IX86_BUILTIN_PSLLDQ128,
18593 IX86_BUILTIN_PSLLW128,
18594 IX86_BUILTIN_PSLLD128,
18595 IX86_BUILTIN_PSLLQ128,
18596 IX86_BUILTIN_PSRAW128,
18597 IX86_BUILTIN_PSRAD128,
18598 IX86_BUILTIN_PSRLW128,
18599 IX86_BUILTIN_PSRLD128,
18600 IX86_BUILTIN_PSRLQ128,
18602 IX86_BUILTIN_PUNPCKHBW128,
18603 IX86_BUILTIN_PUNPCKHWD128,
18604 IX86_BUILTIN_PUNPCKHDQ128,
18605 IX86_BUILTIN_PUNPCKHQDQ128,
18606 IX86_BUILTIN_PUNPCKLBW128,
18607 IX86_BUILTIN_PUNPCKLWD128,
18608 IX86_BUILTIN_PUNPCKLDQ128,
18609 IX86_BUILTIN_PUNPCKLQDQ128,
18611 IX86_BUILTIN_CLFLUSH,
18612 IX86_BUILTIN_MFENCE,
18613 IX86_BUILTIN_LFENCE,
18616 IX86_BUILTIN_ADDSUBPS,
18617 IX86_BUILTIN_HADDPS,
18618 IX86_BUILTIN_HSUBPS,
18619 IX86_BUILTIN_MOVSHDUP,
18620 IX86_BUILTIN_MOVSLDUP,
18621 IX86_BUILTIN_ADDSUBPD,
18622 IX86_BUILTIN_HADDPD,
18623 IX86_BUILTIN_HSUBPD,
18624 IX86_BUILTIN_LDDQU,
18626 IX86_BUILTIN_MONITOR,
18627 IX86_BUILTIN_MWAIT,
18630 IX86_BUILTIN_PHADDW,
18631 IX86_BUILTIN_PHADDD,
18632 IX86_BUILTIN_PHADDSW,
18633 IX86_BUILTIN_PHSUBW,
18634 IX86_BUILTIN_PHSUBD,
18635 IX86_BUILTIN_PHSUBSW,
18636 IX86_BUILTIN_PMADDUBSW,
18637 IX86_BUILTIN_PMULHRSW,
18638 IX86_BUILTIN_PSHUFB,
18639 IX86_BUILTIN_PSIGNB,
18640 IX86_BUILTIN_PSIGNW,
18641 IX86_BUILTIN_PSIGND,
18642 IX86_BUILTIN_PALIGNR,
18643 IX86_BUILTIN_PABSB,
18644 IX86_BUILTIN_PABSW,
18645 IX86_BUILTIN_PABSD,
18647 IX86_BUILTIN_PHADDW128,
18648 IX86_BUILTIN_PHADDD128,
18649 IX86_BUILTIN_PHADDSW128,
18650 IX86_BUILTIN_PHSUBW128,
18651 IX86_BUILTIN_PHSUBD128,
18652 IX86_BUILTIN_PHSUBSW128,
18653 IX86_BUILTIN_PMADDUBSW128,
18654 IX86_BUILTIN_PMULHRSW128,
18655 IX86_BUILTIN_PSHUFB128,
18656 IX86_BUILTIN_PSIGNB128,
18657 IX86_BUILTIN_PSIGNW128,
18658 IX86_BUILTIN_PSIGND128,
18659 IX86_BUILTIN_PALIGNR128,
18660 IX86_BUILTIN_PABSB128,
18661 IX86_BUILTIN_PABSW128,
18662 IX86_BUILTIN_PABSD128,
18664 /* AMDFAM10 - SSE4A New Instructions. */
18665 IX86_BUILTIN_MOVNTSD,
18666 IX86_BUILTIN_MOVNTSS,
18667 IX86_BUILTIN_EXTRQI,
18668 IX86_BUILTIN_EXTRQ,
18669 IX86_BUILTIN_INSERTQI,
18670 IX86_BUILTIN_INSERTQ,
18673 IX86_BUILTIN_BLENDPD,
18674 IX86_BUILTIN_BLENDPS,
18675 IX86_BUILTIN_BLENDVPD,
18676 IX86_BUILTIN_BLENDVPS,
18677 IX86_BUILTIN_PBLENDVB128,
18678 IX86_BUILTIN_PBLENDW128,
18683 IX86_BUILTIN_INSERTPS128,
18685 IX86_BUILTIN_MOVNTDQA,
18686 IX86_BUILTIN_MPSADBW128,
18687 IX86_BUILTIN_PACKUSDW128,
18688 IX86_BUILTIN_PCMPEQQ,
18689 IX86_BUILTIN_PHMINPOSUW128,
18691 IX86_BUILTIN_PMAXSB128,
18692 IX86_BUILTIN_PMAXSD128,
18693 IX86_BUILTIN_PMAXUD128,
18694 IX86_BUILTIN_PMAXUW128,
18696 IX86_BUILTIN_PMINSB128,
18697 IX86_BUILTIN_PMINSD128,
18698 IX86_BUILTIN_PMINUD128,
18699 IX86_BUILTIN_PMINUW128,
18701 IX86_BUILTIN_PMOVSXBW128,
18702 IX86_BUILTIN_PMOVSXBD128,
18703 IX86_BUILTIN_PMOVSXBQ128,
18704 IX86_BUILTIN_PMOVSXWD128,
18705 IX86_BUILTIN_PMOVSXWQ128,
18706 IX86_BUILTIN_PMOVSXDQ128,
18708 IX86_BUILTIN_PMOVZXBW128,
18709 IX86_BUILTIN_PMOVZXBD128,
18710 IX86_BUILTIN_PMOVZXBQ128,
18711 IX86_BUILTIN_PMOVZXWD128,
18712 IX86_BUILTIN_PMOVZXWQ128,
18713 IX86_BUILTIN_PMOVZXDQ128,
18715 IX86_BUILTIN_PMULDQ128,
18716 IX86_BUILTIN_PMULLD128,
18718 IX86_BUILTIN_ROUNDPD,
18719 IX86_BUILTIN_ROUNDPS,
18720 IX86_BUILTIN_ROUNDSD,
18721 IX86_BUILTIN_ROUNDSS,
18723 IX86_BUILTIN_PTESTZ,
18724 IX86_BUILTIN_PTESTC,
18725 IX86_BUILTIN_PTESTNZC,
18727 IX86_BUILTIN_VEC_INIT_V2SI,
18728 IX86_BUILTIN_VEC_INIT_V4HI,
18729 IX86_BUILTIN_VEC_INIT_V8QI,
18730 IX86_BUILTIN_VEC_EXT_V2DF,
18731 IX86_BUILTIN_VEC_EXT_V2DI,
18732 IX86_BUILTIN_VEC_EXT_V4SF,
18733 IX86_BUILTIN_VEC_EXT_V4SI,
18734 IX86_BUILTIN_VEC_EXT_V8HI,
18735 IX86_BUILTIN_VEC_EXT_V2SI,
18736 IX86_BUILTIN_VEC_EXT_V4HI,
18737 IX86_BUILTIN_VEC_EXT_V16QI,
18738 IX86_BUILTIN_VEC_SET_V2DI,
18739 IX86_BUILTIN_VEC_SET_V4SF,
18740 IX86_BUILTIN_VEC_SET_V4SI,
18741 IX86_BUILTIN_VEC_SET_V8HI,
18742 IX86_BUILTIN_VEC_SET_V4HI,
18743 IX86_BUILTIN_VEC_SET_V16QI,
18745 IX86_BUILTIN_VEC_PACK_SFIX,
18748 IX86_BUILTIN_CRC32QI,
18749 IX86_BUILTIN_CRC32HI,
18750 IX86_BUILTIN_CRC32SI,
18751 IX86_BUILTIN_CRC32DI,
18753 IX86_BUILTIN_PCMPESTRI128,
18754 IX86_BUILTIN_PCMPESTRM128,
18755 IX86_BUILTIN_PCMPESTRA128,
18756 IX86_BUILTIN_PCMPESTRC128,
18757 IX86_BUILTIN_PCMPESTRO128,
18758 IX86_BUILTIN_PCMPESTRS128,
18759 IX86_BUILTIN_PCMPESTRZ128,
18760 IX86_BUILTIN_PCMPISTRI128,
18761 IX86_BUILTIN_PCMPISTRM128,
18762 IX86_BUILTIN_PCMPISTRA128,
18763 IX86_BUILTIN_PCMPISTRC128,
18764 IX86_BUILTIN_PCMPISTRO128,
18765 IX86_BUILTIN_PCMPISTRS128,
18766 IX86_BUILTIN_PCMPISTRZ128,
18768 IX86_BUILTIN_PCMPGTQ,
18770 /* AES instructions */
18771 IX86_BUILTIN_AESENC128,
18772 IX86_BUILTIN_AESENCLAST128,
18773 IX86_BUILTIN_AESDEC128,
18774 IX86_BUILTIN_AESDECLAST128,
18775 IX86_BUILTIN_AESIMC128,
18776 IX86_BUILTIN_AESKEYGENASSIST128,
18778 /* PCLMUL instruction */
18779 IX86_BUILTIN_PCLMULQDQ128,
18781 /* TFmode support builtins. */
18783 IX86_BUILTIN_FABSQ,
18784 IX86_BUILTIN_COPYSIGNQ,
18786 /* SSE5 instructions */
18787 IX86_BUILTIN_FMADDSS,
18788 IX86_BUILTIN_FMADDSD,
18789 IX86_BUILTIN_FMADDPS,
18790 IX86_BUILTIN_FMADDPD,
18791 IX86_BUILTIN_FMSUBSS,
18792 IX86_BUILTIN_FMSUBSD,
18793 IX86_BUILTIN_FMSUBPS,
18794 IX86_BUILTIN_FMSUBPD,
18795 IX86_BUILTIN_FNMADDSS,
18796 IX86_BUILTIN_FNMADDSD,
18797 IX86_BUILTIN_FNMADDPS,
18798 IX86_BUILTIN_FNMADDPD,
18799 IX86_BUILTIN_FNMSUBSS,
18800 IX86_BUILTIN_FNMSUBSD,
18801 IX86_BUILTIN_FNMSUBPS,
18802 IX86_BUILTIN_FNMSUBPD,
18803 IX86_BUILTIN_PCMOV_V2DI,
18804 IX86_BUILTIN_PCMOV_V4SI,
18805 IX86_BUILTIN_PCMOV_V8HI,
18806 IX86_BUILTIN_PCMOV_V16QI,
18807 IX86_BUILTIN_PCMOV_V4SF,
18808 IX86_BUILTIN_PCMOV_V2DF,
18809 IX86_BUILTIN_PPERM,
18810 IX86_BUILTIN_PERMPS,
18811 IX86_BUILTIN_PERMPD,
18812 IX86_BUILTIN_PMACSSWW,
18813 IX86_BUILTIN_PMACSWW,
18814 IX86_BUILTIN_PMACSSWD,
18815 IX86_BUILTIN_PMACSWD,
18816 IX86_BUILTIN_PMACSSDD,
18817 IX86_BUILTIN_PMACSDD,
18818 IX86_BUILTIN_PMACSSDQL,
18819 IX86_BUILTIN_PMACSSDQH,
18820 IX86_BUILTIN_PMACSDQL,
18821 IX86_BUILTIN_PMACSDQH,
18822 IX86_BUILTIN_PMADCSSWD,
18823 IX86_BUILTIN_PMADCSWD,
18824 IX86_BUILTIN_PHADDBW,
18825 IX86_BUILTIN_PHADDBD,
18826 IX86_BUILTIN_PHADDBQ,
18827 IX86_BUILTIN_PHADDWD,
18828 IX86_BUILTIN_PHADDWQ,
18829 IX86_BUILTIN_PHADDDQ,
18830 IX86_BUILTIN_PHADDUBW,
18831 IX86_BUILTIN_PHADDUBD,
18832 IX86_BUILTIN_PHADDUBQ,
18833 IX86_BUILTIN_PHADDUWD,
18834 IX86_BUILTIN_PHADDUWQ,
18835 IX86_BUILTIN_PHADDUDQ,
18836 IX86_BUILTIN_PHSUBBW,
18837 IX86_BUILTIN_PHSUBWD,
18838 IX86_BUILTIN_PHSUBDQ,
18839 IX86_BUILTIN_PROTB,
18840 IX86_BUILTIN_PROTW,
18841 IX86_BUILTIN_PROTD,
18842 IX86_BUILTIN_PROTQ,
18843 IX86_BUILTIN_PROTB_IMM,
18844 IX86_BUILTIN_PROTW_IMM,
18845 IX86_BUILTIN_PROTD_IMM,
18846 IX86_BUILTIN_PROTQ_IMM,
18847 IX86_BUILTIN_PSHLB,
18848 IX86_BUILTIN_PSHLW,
18849 IX86_BUILTIN_PSHLD,
18850 IX86_BUILTIN_PSHLQ,
18851 IX86_BUILTIN_PSHAB,
18852 IX86_BUILTIN_PSHAW,
18853 IX86_BUILTIN_PSHAD,
18854 IX86_BUILTIN_PSHAQ,
18855 IX86_BUILTIN_FRCZSS,
18856 IX86_BUILTIN_FRCZSD,
18857 IX86_BUILTIN_FRCZPS,
18858 IX86_BUILTIN_FRCZPD,
18859 IX86_BUILTIN_CVTPH2PS,
18860 IX86_BUILTIN_CVTPS2PH,
18862 IX86_BUILTIN_COMEQSS,
18863 IX86_BUILTIN_COMNESS,
18864 IX86_BUILTIN_COMLTSS,
18865 IX86_BUILTIN_COMLESS,
18866 IX86_BUILTIN_COMGTSS,
18867 IX86_BUILTIN_COMGESS,
18868 IX86_BUILTIN_COMUEQSS,
18869 IX86_BUILTIN_COMUNESS,
18870 IX86_BUILTIN_COMULTSS,
18871 IX86_BUILTIN_COMULESS,
18872 IX86_BUILTIN_COMUGTSS,
18873 IX86_BUILTIN_COMUGESS,
18874 IX86_BUILTIN_COMORDSS,
18875 IX86_BUILTIN_COMUNORDSS,
18876 IX86_BUILTIN_COMFALSESS,
18877 IX86_BUILTIN_COMTRUESS,
18879 IX86_BUILTIN_COMEQSD,
18880 IX86_BUILTIN_COMNESD,
18881 IX86_BUILTIN_COMLTSD,
18882 IX86_BUILTIN_COMLESD,
18883 IX86_BUILTIN_COMGTSD,
18884 IX86_BUILTIN_COMGESD,
18885 IX86_BUILTIN_COMUEQSD,
18886 IX86_BUILTIN_COMUNESD,
18887 IX86_BUILTIN_COMULTSD,
18888 IX86_BUILTIN_COMULESD,
18889 IX86_BUILTIN_COMUGTSD,
18890 IX86_BUILTIN_COMUGESD,
18891 IX86_BUILTIN_COMORDSD,
18892 IX86_BUILTIN_COMUNORDSD,
18893 IX86_BUILTIN_COMFALSESD,
18894 IX86_BUILTIN_COMTRUESD,
18896 IX86_BUILTIN_COMEQPS,
18897 IX86_BUILTIN_COMNEPS,
18898 IX86_BUILTIN_COMLTPS,
18899 IX86_BUILTIN_COMLEPS,
18900 IX86_BUILTIN_COMGTPS,
18901 IX86_BUILTIN_COMGEPS,
18902 IX86_BUILTIN_COMUEQPS,
18903 IX86_BUILTIN_COMUNEPS,
18904 IX86_BUILTIN_COMULTPS,
18905 IX86_BUILTIN_COMULEPS,
18906 IX86_BUILTIN_COMUGTPS,
18907 IX86_BUILTIN_COMUGEPS,
18908 IX86_BUILTIN_COMORDPS,
18909 IX86_BUILTIN_COMUNORDPS,
18910 IX86_BUILTIN_COMFALSEPS,
18911 IX86_BUILTIN_COMTRUEPS,
18913 IX86_BUILTIN_COMEQPD,
18914 IX86_BUILTIN_COMNEPD,
18915 IX86_BUILTIN_COMLTPD,
18916 IX86_BUILTIN_COMLEPD,
18917 IX86_BUILTIN_COMGTPD,
18918 IX86_BUILTIN_COMGEPD,
18919 IX86_BUILTIN_COMUEQPD,
18920 IX86_BUILTIN_COMUNEPD,
18921 IX86_BUILTIN_COMULTPD,
18922 IX86_BUILTIN_COMULEPD,
18923 IX86_BUILTIN_COMUGTPD,
18924 IX86_BUILTIN_COMUGEPD,
18925 IX86_BUILTIN_COMORDPD,
18926 IX86_BUILTIN_COMUNORDPD,
18927 IX86_BUILTIN_COMFALSEPD,
18928 IX86_BUILTIN_COMTRUEPD,
18930 IX86_BUILTIN_PCOMEQUB,
18931 IX86_BUILTIN_PCOMNEUB,
18932 IX86_BUILTIN_PCOMLTUB,
18933 IX86_BUILTIN_PCOMLEUB,
18934 IX86_BUILTIN_PCOMGTUB,
18935 IX86_BUILTIN_PCOMGEUB,
18936 IX86_BUILTIN_PCOMFALSEUB,
18937 IX86_BUILTIN_PCOMTRUEUB,
18938 IX86_BUILTIN_PCOMEQUW,
18939 IX86_BUILTIN_PCOMNEUW,
18940 IX86_BUILTIN_PCOMLTUW,
18941 IX86_BUILTIN_PCOMLEUW,
18942 IX86_BUILTIN_PCOMGTUW,
18943 IX86_BUILTIN_PCOMGEUW,
18944 IX86_BUILTIN_PCOMFALSEUW,
18945 IX86_BUILTIN_PCOMTRUEUW,
18946 IX86_BUILTIN_PCOMEQUD,
18947 IX86_BUILTIN_PCOMNEUD,
18948 IX86_BUILTIN_PCOMLTUD,
18949 IX86_BUILTIN_PCOMLEUD,
18950 IX86_BUILTIN_PCOMGTUD,
18951 IX86_BUILTIN_PCOMGEUD,
18952 IX86_BUILTIN_PCOMFALSEUD,
18953 IX86_BUILTIN_PCOMTRUEUD,
18954 IX86_BUILTIN_PCOMEQUQ,
18955 IX86_BUILTIN_PCOMNEUQ,
18956 IX86_BUILTIN_PCOMLTUQ,
18957 IX86_BUILTIN_PCOMLEUQ,
18958 IX86_BUILTIN_PCOMGTUQ,
18959 IX86_BUILTIN_PCOMGEUQ,
18960 IX86_BUILTIN_PCOMFALSEUQ,
18961 IX86_BUILTIN_PCOMTRUEUQ,
18963 IX86_BUILTIN_PCOMEQB,
18964 IX86_BUILTIN_PCOMNEB,
18965 IX86_BUILTIN_PCOMLTB,
18966 IX86_BUILTIN_PCOMLEB,
18967 IX86_BUILTIN_PCOMGTB,
18968 IX86_BUILTIN_PCOMGEB,
18969 IX86_BUILTIN_PCOMFALSEB,
18970 IX86_BUILTIN_PCOMTRUEB,
18971 IX86_BUILTIN_PCOMEQW,
18972 IX86_BUILTIN_PCOMNEW,
18973 IX86_BUILTIN_PCOMLTW,
18974 IX86_BUILTIN_PCOMLEW,
18975 IX86_BUILTIN_PCOMGTW,
18976 IX86_BUILTIN_PCOMGEW,
18977 IX86_BUILTIN_PCOMFALSEW,
18978 IX86_BUILTIN_PCOMTRUEW,
18979 IX86_BUILTIN_PCOMEQD,
18980 IX86_BUILTIN_PCOMNED,
18981 IX86_BUILTIN_PCOMLTD,
18982 IX86_BUILTIN_PCOMLED,
18983 IX86_BUILTIN_PCOMGTD,
18984 IX86_BUILTIN_PCOMGED,
18985 IX86_BUILTIN_PCOMFALSED,
18986 IX86_BUILTIN_PCOMTRUED,
18987 IX86_BUILTIN_PCOMEQQ,
18988 IX86_BUILTIN_PCOMNEQ,
18989 IX86_BUILTIN_PCOMLTQ,
18990 IX86_BUILTIN_PCOMLEQ,
18991 IX86_BUILTIN_PCOMGTQ,
18992 IX86_BUILTIN_PCOMGEQ,
18993 IX86_BUILTIN_PCOMFALSEQ,
18994 IX86_BUILTIN_PCOMTRUEQ,
18999 /* Table for the ix86 builtin decls. */
19000 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
19002 /* Table to record which ISA options the builtin needs. */
19003 static int ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
19005 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
19006 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
19007 * function decl in the ix86_builtins array. Returns the function decl or
19008 * NULL_TREE, if the builtin was not added.
19010 * Record all builtins, even if it isn't an instruction set in the current ISA
19011 * in case the user uses function specific options for a different ISA. When
19012 * the builtin is expanded, check at that time whether it is valid. */
19015 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
19017 tree decl = NULL_TREE;
19019 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
19021 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
19023 ix86_builtins[(int) code] = decl;
19024 ix86_builtins_isa[(int) code] = mask;
19030 /* Like def_builtin, but also marks the function decl "const". */
19033 def_builtin_const (int mask, const char *name, tree type,
19034 enum ix86_builtins code)
19036 tree decl = def_builtin (mask, name, type, code);
19038 TREE_READONLY (decl) = 1;
19042 /* Bits for builtin_description.flag. */
19044 /* Set when we don't support the comparison natively, and should
19045 swap_comparison in order to support it. */
19046 #define BUILTIN_DESC_SWAP_OPERANDS 1
19048 struct builtin_description
19050 const unsigned int mask;
19051 const enum insn_code icode;
19052 const char *const name;
19053 const enum ix86_builtins code;
19054 const enum rtx_code comparison;
19058 static const struct builtin_description bdesc_comi[] =
19060 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
19061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
19062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
19063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
19064 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
19065 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
19066 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
19067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
19068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
19069 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
19070 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
19071 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
19072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
19073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
19074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
19075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
19076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
19077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
19078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
19079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
19080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
19081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
19082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
19083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
19086 static const struct builtin_description bdesc_pcmpestr[] =
19089 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
19090 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
19091 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
19092 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
19093 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
19094 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
19095 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
19098 static const struct builtin_description bdesc_pcmpistr[] =
19101 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
19102 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
19103 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
19104 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
19105 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
19106 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
19107 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
19110 /* Special builtin types */
19111 enum ix86_special_builtin_type
19113 SPECIAL_FTYPE_UNKNOWN,
19115 V16QI_FTYPE_PCCHAR,
19116 V4SF_FTYPE_PCFLOAT,
19117 V2DF_FTYPE_PCDOUBLE,
19118 V4SF_FTYPE_V4SF_PCV2SF,
19119 V2DF_FTYPE_V2DF_PCDOUBLE,
19121 VOID_FTYPE_PV2SF_V4SF,
19122 VOID_FTYPE_PV2DI_V2DI,
19123 VOID_FTYPE_PCHAR_V16QI,
19124 VOID_FTYPE_PFLOAT_V4SF,
19125 VOID_FTYPE_PDOUBLE_V2DF,
19127 VOID_FTYPE_PINT_INT
19130 /* Builtin types */
19131 enum ix86_builtin_type
19134 FLOAT128_FTYPE_FLOAT128,
19136 FLOAT128_FTYPE_FLOAT128_FLOAT128,
19137 INT_FTYPE_V2DI_V2DI_PTEST,
19155 V4SF_FTYPE_V4SF_VEC_MERGE,
19163 V2DF_FTYPE_V2DF_VEC_MERGE,
19173 V16QI_FTYPE_V16QI_V16QI,
19174 V16QI_FTYPE_V8HI_V8HI,
19175 V8QI_FTYPE_V8QI_V8QI,
19176 V8QI_FTYPE_V4HI_V4HI,
19177 V8HI_FTYPE_V8HI_V8HI,
19178 V8HI_FTYPE_V8HI_V8HI_COUNT,
19179 V8HI_FTYPE_V16QI_V16QI,
19180 V8HI_FTYPE_V4SI_V4SI,
19181 V8HI_FTYPE_V8HI_SI_COUNT,
19182 V4SI_FTYPE_V4SI_V4SI,
19183 V4SI_FTYPE_V4SI_V4SI_COUNT,
19184 V4SI_FTYPE_V8HI_V8HI,
19185 V4SI_FTYPE_V4SF_V4SF,
19186 V4SI_FTYPE_V2DF_V2DF,
19187 V4SI_FTYPE_V4SI_SI_COUNT,
19188 V4HI_FTYPE_V4HI_V4HI,
19189 V4HI_FTYPE_V4HI_V4HI_COUNT,
19190 V4HI_FTYPE_V8QI_V8QI,
19191 V4HI_FTYPE_V2SI_V2SI,
19192 V4HI_FTYPE_V4HI_SI_COUNT,
19193 V4SF_FTYPE_V4SF_V4SF,
19194 V4SF_FTYPE_V4SF_V4SF_SWAP,
19195 V4SF_FTYPE_V4SF_V2SI,
19196 V4SF_FTYPE_V4SF_V2DF,
19197 V4SF_FTYPE_V4SF_DI,
19198 V4SF_FTYPE_V4SF_SI,
19199 V2DI_FTYPE_V2DI_V2DI,
19200 V2DI_FTYPE_V2DI_V2DI_COUNT,
19201 V2DI_FTYPE_V16QI_V16QI,
19202 V2DI_FTYPE_V4SI_V4SI,
19203 V2DI_FTYPE_V2DI_V16QI,
19204 V2DI_FTYPE_V2DF_V2DF,
19205 V2DI_FTYPE_V2DI_SI_COUNT,
19206 V2SI_FTYPE_V2SI_V2SI,
19207 V2SI_FTYPE_V2SI_V2SI_COUNT,
19208 V2SI_FTYPE_V4HI_V4HI,
19209 V2SI_FTYPE_V2SF_V2SF,
19210 V2SI_FTYPE_V2SI_SI_COUNT,
19211 V2DF_FTYPE_V2DF_V2DF,
19212 V2DF_FTYPE_V2DF_V2DF_SWAP,
19213 V2DF_FTYPE_V2DF_V4SF,
19214 V2DF_FTYPE_V2DF_DI,
19215 V2DF_FTYPE_V2DF_SI,
19216 V2SF_FTYPE_V2SF_V2SF,
19217 V1DI_FTYPE_V1DI_V1DI,
19218 V1DI_FTYPE_V1DI_V1DI_COUNT,
19219 V1DI_FTYPE_V8QI_V8QI,
19220 V1DI_FTYPE_V2SI_V2SI,
19221 V1DI_FTYPE_V1DI_SI_COUNT,
19222 UINT64_FTYPE_UINT64_UINT64,
19223 UINT_FTYPE_UINT_UINT,
19224 UINT_FTYPE_UINT_USHORT,
19225 UINT_FTYPE_UINT_UCHAR,
19226 V8HI_FTYPE_V8HI_INT,
19227 V4SI_FTYPE_V4SI_INT,
19228 V4HI_FTYPE_V4HI_INT,
19229 V4SF_FTYPE_V4SF_INT,
19230 V2DI_FTYPE_V2DI_INT,
19231 V2DI2TI_FTYPE_V2DI_INT,
19232 V2DF_FTYPE_V2DF_INT,
19233 V16QI_FTYPE_V16QI_V16QI_V16QI,
19234 V4SF_FTYPE_V4SF_V4SF_V4SF,
19235 V2DF_FTYPE_V2DF_V2DF_V2DF,
19236 V16QI_FTYPE_V16QI_V16QI_INT,
19237 V8HI_FTYPE_V8HI_V8HI_INT,
19238 V4SI_FTYPE_V4SI_V4SI_INT,
19239 V4SF_FTYPE_V4SF_V4SF_INT,
19240 V2DI_FTYPE_V2DI_V2DI_INT,
19241 V2DI2TI_FTYPE_V2DI_V2DI_INT,
19242 V1DI2DI_FTYPE_V1DI_V1DI_INT,
19243 V2DF_FTYPE_V2DF_V2DF_INT,
19244 V2DI_FTYPE_V2DI_UINT_UINT,
19245 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
19248 /* Special builtins with variable number of arguments. */
19249 static const struct builtin_description bdesc_special_args[] =
19252 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
19255 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
19258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
19259 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
19260 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
19262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
19263 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
19264 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
19265 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
19267 /* SSE or 3DNow!A */
19268 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
19269 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
19272 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
19273 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
19274 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
19275 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
19276 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
19277 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
19278 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
19279 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
19280 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
19282 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
19283 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
19286 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
19289 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
19292 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
19293 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
19296 /* Builtins with variable number of arguments. */
19297 static const struct builtin_description bdesc_args[] =
19300 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19301 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19303 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19305 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19308 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19310 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19317 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19322 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19326 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19331 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
19336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
19338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
19339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
19340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
19342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
19344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
19345 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
19346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
19347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
19348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
19349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
19351 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
19352 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
19353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
19354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
19355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
19356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
19358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
19359 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
19360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
19361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
19364 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
19365 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
19366 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
19367 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
19369 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19370 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19371 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19372 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
19373 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
19374 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
19375 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19376 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19377 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19378 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19379 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19380 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19381 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19382 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19383 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19386 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
19387 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
19388 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
19389 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
19390 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19391 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19394 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
19395 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19396 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19397 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19398 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
19401 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
19402 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
19403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
19404 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
19405 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
19407 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
19409 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19410 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19411 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
19419 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
19420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
19421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
19422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
19423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
19424 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
19425 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
19426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
19427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
19428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
19429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
19430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
19431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
19432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
19433 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
19434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
19435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
19436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
19437 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
19438 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
19439 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
19441 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19442 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19444 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19446 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19448 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19449 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19451 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19452 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19453 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
19458 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
19459 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
19461 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
19463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
19464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
19465 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
19467 /* SSE MMX or 3Dnow!A */
19468 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19469 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19470 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19472 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19473 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19474 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19475 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19477 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
19478 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
19480 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
19483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
19485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
19486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
19487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
19488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
19489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
19491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
19492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
19493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
19494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
19495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
19497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
19499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
19500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
19501 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
19502 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
19504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
19505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
19506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
19508 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19509 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19510 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19511 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
19518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
19519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
19520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
19521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
19522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
19523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
19524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
19525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
19526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
19527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
19528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
19529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
19530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
19531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
19532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
19533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
19534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
19535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
19536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
19538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19539 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19543 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19545 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19546 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19552 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
19554 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19555 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19556 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19557 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19558 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19559 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19560 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19561 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19572 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19573 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
19575 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19577 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19578 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19590 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19591 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19592 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
19605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
19606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
19608 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
19611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
19612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
19614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
19616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
19617 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
19618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
19619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
19621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
19622 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
19623 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
19624 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
19625 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
19626 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
19627 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
19629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
19630 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
19631 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
19632 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
19633 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
19634 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
19635 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
19637 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
19638 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
19639 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
19640 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
19642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
19643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
19644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
19646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
19648 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
19649 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
19652 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
19653 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
19656 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
19657 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19659 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19660 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19661 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19662 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19663 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19664 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19667 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
19668 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
19669 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
19670 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
19671 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
19672 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
19674 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19675 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19676 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19677 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19678 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19679 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19680 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19681 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19682 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19683 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19684 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19685 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19686 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
19687 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
19688 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19689 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19690 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19691 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19692 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19693 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19694 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19695 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19696 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19697 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19700 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
19701 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
19704 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
19705 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
19706 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
19707 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
19708 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
19709 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
19710 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
19711 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
19712 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
19713 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
19715 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
19716 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
19717 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
19718 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
19719 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
19720 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
19721 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
19722 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
19723 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
19724 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
19725 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
19726 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
19727 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
19729 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
19730 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19731 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19732 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19733 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19734 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19735 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19737 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
19740 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19742 /* SSE4.1 and SSE5 */
19743 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
19744 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
19745 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
19746 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
19748 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
19749 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
19750 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
19753 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19754 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
19755 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
19756 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
19757 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
19760 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
19761 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
19762 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
19763 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19766 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
19767 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
19769 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19770 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19771 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19772 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19775 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
19779 enum multi_arg_type {
19789 MULTI_ARG_3_PERMPS,
19790 MULTI_ARG_3_PERMPD,
19797 MULTI_ARG_2_DI_IMM,
19798 MULTI_ARG_2_SI_IMM,
19799 MULTI_ARG_2_HI_IMM,
19800 MULTI_ARG_2_QI_IMM,
19801 MULTI_ARG_2_SF_CMP,
19802 MULTI_ARG_2_DF_CMP,
19803 MULTI_ARG_2_DI_CMP,
19804 MULTI_ARG_2_SI_CMP,
19805 MULTI_ARG_2_HI_CMP,
19806 MULTI_ARG_2_QI_CMP,
19829 static const struct builtin_description bdesc_multi_arg[] =
19831 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
19832 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
19833 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
19834 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
19835 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
19836 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
19837 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
19838 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
19839 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
19840 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
19841 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
19842 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
19843 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
19844 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
19845 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
19846 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
19847 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
19848 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
19849 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
19850 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
19851 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
19852 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
19853 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
19854 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
19855 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
19856 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
19857 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
19858 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
19859 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
19860 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
19861 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
19862 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
19863 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
19864 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
19865 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
19866 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
19867 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
19868 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
19869 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
19870 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
19871 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
19872 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
19873 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
19874 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
19875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
19876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
19877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
19878 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
19879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
19880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
19881 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
19882 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
19883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
19884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
19885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
19886 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
19887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
19888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
19889 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
19890 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
19891 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
19892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
19893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
19894 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
19895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
19896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
19897 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
19898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
19899 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
19900 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
19901 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
19902 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
19903 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
19904 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
19905 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
19907 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
19908 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
19909 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
19910 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
19911 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
19912 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
19913 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
19914 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
19915 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
19916 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
19917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
19918 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
19919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
19920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
19921 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
19922 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
19924 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
19925 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
19926 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
19927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
19928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
19929 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
19930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
19931 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
19932 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19933 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19934 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
19935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
19936 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
19937 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
19938 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
19939 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
19941 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
19942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
19943 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
19944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
19945 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
19946 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
19947 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
19948 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
19949 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
19950 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
19951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
19952 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
19953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
19954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
19955 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
19956 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
19958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
19959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
19960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
19961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
19962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
19963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
19964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
19965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
19966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
19969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
19970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
19971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
19972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
19973 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
19975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
19976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
19977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
19978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
19979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
19980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
19981 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
19983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
19984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
19985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
19986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
19987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
19988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
19989 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
19991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
19992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
19993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
19994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
19995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
19996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
19997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
19999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
20000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
20001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
20002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
20003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
20004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
20005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
20007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
20008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
20009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
20010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
20011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
20012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
20013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
20015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
20016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
20017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
20018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
20019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
20020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
20021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
20023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
20024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
20025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
20026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
20027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
20028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
20029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
20031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
20032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
20033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
20034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
20035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
20036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
20037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
20039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
20040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
20041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
20042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
20043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
20044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
20045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
20046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
20048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
20049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
20050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
20051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
20052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
20053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
20054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
20055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
20057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
20058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
20059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
20060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
20061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
20062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
20063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
20064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
20067 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
20068 in the current target ISA to allow the user to compile particular modules
20069 with different target specific options that differ from the command line
20072 ix86_init_mmx_sse_builtins (void)
20074 const struct builtin_description * d;
20077 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
20078 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
20079 tree V1DI_type_node
20080 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
20081 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
20082 tree V2DI_type_node
20083 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
20084 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
20085 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
20086 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
20087 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
20088 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
20089 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
20091 tree pchar_type_node = build_pointer_type (char_type_node);
20092 tree pcchar_type_node
20093 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
20094 tree pfloat_type_node = build_pointer_type (float_type_node);
20095 tree pcfloat_type_node
20096 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
20097 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
20098 tree pcv2sf_type_node
20099 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
20100 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
20101 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
20104 tree int_ftype_v4sf_v4sf
20105 = build_function_type_list (integer_type_node,
20106 V4SF_type_node, V4SF_type_node, NULL_TREE);
20107 tree v4si_ftype_v4sf_v4sf
20108 = build_function_type_list (V4SI_type_node,
20109 V4SF_type_node, V4SF_type_node, NULL_TREE);
20110 /* MMX/SSE/integer conversions. */
20111 tree int_ftype_v4sf
20112 = build_function_type_list (integer_type_node,
20113 V4SF_type_node, NULL_TREE);
20114 tree int64_ftype_v4sf
20115 = build_function_type_list (long_long_integer_type_node,
20116 V4SF_type_node, NULL_TREE);
20117 tree int_ftype_v8qi
20118 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
20119 tree v4sf_ftype_v4sf_int
20120 = build_function_type_list (V4SF_type_node,
20121 V4SF_type_node, integer_type_node, NULL_TREE);
20122 tree v4sf_ftype_v4sf_int64
20123 = build_function_type_list (V4SF_type_node,
20124 V4SF_type_node, long_long_integer_type_node,
20126 tree v4sf_ftype_v4sf_v2si
20127 = build_function_type_list (V4SF_type_node,
20128 V4SF_type_node, V2SI_type_node, NULL_TREE);
20130 /* Miscellaneous. */
20131 tree v8qi_ftype_v4hi_v4hi
20132 = build_function_type_list (V8QI_type_node,
20133 V4HI_type_node, V4HI_type_node, NULL_TREE);
20134 tree v4hi_ftype_v2si_v2si
20135 = build_function_type_list (V4HI_type_node,
20136 V2SI_type_node, V2SI_type_node, NULL_TREE);
20137 tree v4sf_ftype_v4sf_v4sf_int
20138 = build_function_type_list (V4SF_type_node,
20139 V4SF_type_node, V4SF_type_node,
20140 integer_type_node, NULL_TREE);
20141 tree v2si_ftype_v4hi_v4hi
20142 = build_function_type_list (V2SI_type_node,
20143 V4HI_type_node, V4HI_type_node, NULL_TREE);
20144 tree v4hi_ftype_v4hi_int
20145 = build_function_type_list (V4HI_type_node,
20146 V4HI_type_node, integer_type_node, NULL_TREE);
20147 tree v2si_ftype_v2si_int
20148 = build_function_type_list (V2SI_type_node,
20149 V2SI_type_node, integer_type_node, NULL_TREE);
20150 tree v1di_ftype_v1di_int
20151 = build_function_type_list (V1DI_type_node,
20152 V1DI_type_node, integer_type_node, NULL_TREE);
20154 tree void_ftype_void
20155 = build_function_type (void_type_node, void_list_node);
20156 tree void_ftype_unsigned
20157 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
20158 tree void_ftype_unsigned_unsigned
20159 = build_function_type_list (void_type_node, unsigned_type_node,
20160 unsigned_type_node, NULL_TREE);
20161 tree void_ftype_pcvoid_unsigned_unsigned
20162 = build_function_type_list (void_type_node, const_ptr_type_node,
20163 unsigned_type_node, unsigned_type_node,
20165 tree unsigned_ftype_void
20166 = build_function_type (unsigned_type_node, void_list_node);
20167 tree v2si_ftype_v4sf
20168 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
20169 /* Loads/stores. */
20170 tree void_ftype_v8qi_v8qi_pchar
20171 = build_function_type_list (void_type_node,
20172 V8QI_type_node, V8QI_type_node,
20173 pchar_type_node, NULL_TREE);
20174 tree v4sf_ftype_pcfloat
20175 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
20176 tree v4sf_ftype_v4sf_pcv2sf
20177 = build_function_type_list (V4SF_type_node,
20178 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
20179 tree void_ftype_pv2sf_v4sf
20180 = build_function_type_list (void_type_node,
20181 pv2sf_type_node, V4SF_type_node, NULL_TREE);
20182 tree void_ftype_pfloat_v4sf
20183 = build_function_type_list (void_type_node,
20184 pfloat_type_node, V4SF_type_node, NULL_TREE);
20185 tree void_ftype_pdi_di
20186 = build_function_type_list (void_type_node,
20187 pdi_type_node, long_long_unsigned_type_node,
20189 tree void_ftype_pv2di_v2di
20190 = build_function_type_list (void_type_node,
20191 pv2di_type_node, V2DI_type_node, NULL_TREE);
20192 /* Normal vector unops. */
20193 tree v4sf_ftype_v4sf
20194 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
20195 tree v16qi_ftype_v16qi
20196 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
20197 tree v8hi_ftype_v8hi
20198 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
20199 tree v4si_ftype_v4si
20200 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
20201 tree v8qi_ftype_v8qi
20202 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
20203 tree v4hi_ftype_v4hi
20204 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
20206 /* Normal vector binops. */
20207 tree v4sf_ftype_v4sf_v4sf
20208 = build_function_type_list (V4SF_type_node,
20209 V4SF_type_node, V4SF_type_node, NULL_TREE);
20210 tree v8qi_ftype_v8qi_v8qi
20211 = build_function_type_list (V8QI_type_node,
20212 V8QI_type_node, V8QI_type_node, NULL_TREE);
20213 tree v4hi_ftype_v4hi_v4hi
20214 = build_function_type_list (V4HI_type_node,
20215 V4HI_type_node, V4HI_type_node, NULL_TREE);
20216 tree v2si_ftype_v2si_v2si
20217 = build_function_type_list (V2SI_type_node,
20218 V2SI_type_node, V2SI_type_node, NULL_TREE);
20219 tree v1di_ftype_v1di_v1di
20220 = build_function_type_list (V1DI_type_node,
20221 V1DI_type_node, V1DI_type_node, NULL_TREE);
20222 tree v1di_ftype_v1di_v1di_int
20223 = build_function_type_list (V1DI_type_node,
20224 V1DI_type_node, V1DI_type_node,
20225 integer_type_node, NULL_TREE);
20226 tree v2si_ftype_v2sf
20227 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
20228 tree v2sf_ftype_v2si
20229 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
20230 tree v2si_ftype_v2si
20231 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
20232 tree v2sf_ftype_v2sf
20233 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
20234 tree v2sf_ftype_v2sf_v2sf
20235 = build_function_type_list (V2SF_type_node,
20236 V2SF_type_node, V2SF_type_node, NULL_TREE);
20237 tree v2si_ftype_v2sf_v2sf
20238 = build_function_type_list (V2SI_type_node,
20239 V2SF_type_node, V2SF_type_node, NULL_TREE);
20240 tree pint_type_node = build_pointer_type (integer_type_node);
20241 tree pdouble_type_node = build_pointer_type (double_type_node);
20242 tree pcdouble_type_node = build_pointer_type (
20243 build_type_variant (double_type_node, 1, 0));
20244 tree int_ftype_v2df_v2df
20245 = build_function_type_list (integer_type_node,
20246 V2DF_type_node, V2DF_type_node, NULL_TREE);
20248 tree void_ftype_pcvoid
20249 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
20250 tree v4sf_ftype_v4si
20251 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
20252 tree v4si_ftype_v4sf
20253 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
20254 tree v2df_ftype_v4si
20255 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
20256 tree v4si_ftype_v2df
20257 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
20258 tree v4si_ftype_v2df_v2df
20259 = build_function_type_list (V4SI_type_node,
20260 V2DF_type_node, V2DF_type_node, NULL_TREE);
20261 tree v2si_ftype_v2df
20262 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
20263 tree v4sf_ftype_v2df
20264 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
20265 tree v2df_ftype_v2si
20266 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
20267 tree v2df_ftype_v4sf
20268 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
20269 tree int_ftype_v2df
20270 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
20271 tree int64_ftype_v2df
20272 = build_function_type_list (long_long_integer_type_node,
20273 V2DF_type_node, NULL_TREE);
20274 tree v2df_ftype_v2df_int
20275 = build_function_type_list (V2DF_type_node,
20276 V2DF_type_node, integer_type_node, NULL_TREE);
20277 tree v2df_ftype_v2df_int64
20278 = build_function_type_list (V2DF_type_node,
20279 V2DF_type_node, long_long_integer_type_node,
20281 tree v4sf_ftype_v4sf_v2df
20282 = build_function_type_list (V4SF_type_node,
20283 V4SF_type_node, V2DF_type_node, NULL_TREE);
20284 tree v2df_ftype_v2df_v4sf
20285 = build_function_type_list (V2DF_type_node,
20286 V2DF_type_node, V4SF_type_node, NULL_TREE);
20287 tree v2df_ftype_v2df_v2df_int
20288 = build_function_type_list (V2DF_type_node,
20289 V2DF_type_node, V2DF_type_node,
20292 tree v2df_ftype_v2df_pcdouble
20293 = build_function_type_list (V2DF_type_node,
20294 V2DF_type_node, pcdouble_type_node, NULL_TREE);
20295 tree void_ftype_pdouble_v2df
20296 = build_function_type_list (void_type_node,
20297 pdouble_type_node, V2DF_type_node, NULL_TREE);
20298 tree void_ftype_pint_int
20299 = build_function_type_list (void_type_node,
20300 pint_type_node, integer_type_node, NULL_TREE);
20301 tree void_ftype_v16qi_v16qi_pchar
20302 = build_function_type_list (void_type_node,
20303 V16QI_type_node, V16QI_type_node,
20304 pchar_type_node, NULL_TREE);
20305 tree v2df_ftype_pcdouble
20306 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
20307 tree v2df_ftype_v2df_v2df
20308 = build_function_type_list (V2DF_type_node,
20309 V2DF_type_node, V2DF_type_node, NULL_TREE);
20310 tree v16qi_ftype_v16qi_v16qi
20311 = build_function_type_list (V16QI_type_node,
20312 V16QI_type_node, V16QI_type_node, NULL_TREE);
20313 tree v8hi_ftype_v8hi_v8hi
20314 = build_function_type_list (V8HI_type_node,
20315 V8HI_type_node, V8HI_type_node, NULL_TREE);
20316 tree v4si_ftype_v4si_v4si
20317 = build_function_type_list (V4SI_type_node,
20318 V4SI_type_node, V4SI_type_node, NULL_TREE);
20319 tree v2di_ftype_v2di_v2di
20320 = build_function_type_list (V2DI_type_node,
20321 V2DI_type_node, V2DI_type_node, NULL_TREE);
20322 tree v2di_ftype_v2df_v2df
20323 = build_function_type_list (V2DI_type_node,
20324 V2DF_type_node, V2DF_type_node, NULL_TREE);
20325 tree v2df_ftype_v2df
20326 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
20327 tree v2di_ftype_v2di_int
20328 = build_function_type_list (V2DI_type_node,
20329 V2DI_type_node, integer_type_node, NULL_TREE);
20330 tree v2di_ftype_v2di_v2di_int
20331 = build_function_type_list (V2DI_type_node, V2DI_type_node,
20332 V2DI_type_node, integer_type_node, NULL_TREE);
20333 tree v4si_ftype_v4si_int
20334 = build_function_type_list (V4SI_type_node,
20335 V4SI_type_node, integer_type_node, NULL_TREE);
20336 tree v8hi_ftype_v8hi_int
20337 = build_function_type_list (V8HI_type_node,
20338 V8HI_type_node, integer_type_node, NULL_TREE);
20339 tree v4si_ftype_v8hi_v8hi
20340 = build_function_type_list (V4SI_type_node,
20341 V8HI_type_node, V8HI_type_node, NULL_TREE);
20342 tree v1di_ftype_v8qi_v8qi
20343 = build_function_type_list (V1DI_type_node,
20344 V8QI_type_node, V8QI_type_node, NULL_TREE);
20345 tree v1di_ftype_v2si_v2si
20346 = build_function_type_list (V1DI_type_node,
20347 V2SI_type_node, V2SI_type_node, NULL_TREE);
20348 tree v2di_ftype_v16qi_v16qi
20349 = build_function_type_list (V2DI_type_node,
20350 V16QI_type_node, V16QI_type_node, NULL_TREE);
20351 tree v2di_ftype_v4si_v4si
20352 = build_function_type_list (V2DI_type_node,
20353 V4SI_type_node, V4SI_type_node, NULL_TREE);
20354 tree int_ftype_v16qi
20355 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
20356 tree v16qi_ftype_pcchar
20357 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
20358 tree void_ftype_pchar_v16qi
20359 = build_function_type_list (void_type_node,
20360 pchar_type_node, V16QI_type_node, NULL_TREE);
20362 tree v2di_ftype_v2di_unsigned_unsigned
20363 = build_function_type_list (V2DI_type_node, V2DI_type_node,
20364 unsigned_type_node, unsigned_type_node,
20366 tree v2di_ftype_v2di_v2di_unsigned_unsigned
20367 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
20368 unsigned_type_node, unsigned_type_node,
20370 tree v2di_ftype_v2di_v16qi
20371 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
20373 tree v2df_ftype_v2df_v2df_v2df
20374 = build_function_type_list (V2DF_type_node,
20375 V2DF_type_node, V2DF_type_node,
20376 V2DF_type_node, NULL_TREE);
20377 tree v4sf_ftype_v4sf_v4sf_v4sf
20378 = build_function_type_list (V4SF_type_node,
20379 V4SF_type_node, V4SF_type_node,
20380 V4SF_type_node, NULL_TREE);
20381 tree v8hi_ftype_v16qi
20382 = build_function_type_list (V8HI_type_node, V16QI_type_node,
20384 tree v4si_ftype_v16qi
20385 = build_function_type_list (V4SI_type_node, V16QI_type_node,
20387 tree v2di_ftype_v16qi
20388 = build_function_type_list (V2DI_type_node, V16QI_type_node,
20390 tree v4si_ftype_v8hi
20391 = build_function_type_list (V4SI_type_node, V8HI_type_node,
20393 tree v2di_ftype_v8hi
20394 = build_function_type_list (V2DI_type_node, V8HI_type_node,
20396 tree v2di_ftype_v4si
20397 = build_function_type_list (V2DI_type_node, V4SI_type_node,
20399 tree v2di_ftype_pv2di
20400 = build_function_type_list (V2DI_type_node, pv2di_type_node,
20402 tree v16qi_ftype_v16qi_v16qi_int
20403 = build_function_type_list (V16QI_type_node, V16QI_type_node,
20404 V16QI_type_node, integer_type_node,
20406 tree v16qi_ftype_v16qi_v16qi_v16qi
20407 = build_function_type_list (V16QI_type_node, V16QI_type_node,
20408 V16QI_type_node, V16QI_type_node,
20410 tree v8hi_ftype_v8hi_v8hi_int
20411 = build_function_type_list (V8HI_type_node, V8HI_type_node,
20412 V8HI_type_node, integer_type_node,
20414 tree v4si_ftype_v4si_v4si_int
20415 = build_function_type_list (V4SI_type_node, V4SI_type_node,
20416 V4SI_type_node, integer_type_node,
20418 tree int_ftype_v2di_v2di
20419 = build_function_type_list (integer_type_node,
20420 V2DI_type_node, V2DI_type_node,
20422 tree int_ftype_v16qi_int_v16qi_int_int
20423 = build_function_type_list (integer_type_node,
20430 tree v16qi_ftype_v16qi_int_v16qi_int_int
20431 = build_function_type_list (V16QI_type_node,
20438 tree int_ftype_v16qi_v16qi_int
20439 = build_function_type_list (integer_type_node,
20445 /* SSE5 instructions */
20446 tree v2di_ftype_v2di_v2di_v2di
20447 = build_function_type_list (V2DI_type_node,
20453 tree v4si_ftype_v4si_v4si_v4si
20454 = build_function_type_list (V4SI_type_node,
20460 tree v4si_ftype_v4si_v4si_v2di
20461 = build_function_type_list (V4SI_type_node,
20467 tree v8hi_ftype_v8hi_v8hi_v8hi
20468 = build_function_type_list (V8HI_type_node,
20474 tree v8hi_ftype_v8hi_v8hi_v4si
20475 = build_function_type_list (V8HI_type_node,
20481 tree v2df_ftype_v2df_v2df_v16qi
20482 = build_function_type_list (V2DF_type_node,
20488 tree v4sf_ftype_v4sf_v4sf_v16qi
20489 = build_function_type_list (V4SF_type_node,
20495 tree v2di_ftype_v2di_si
20496 = build_function_type_list (V2DI_type_node,
20501 tree v4si_ftype_v4si_si
20502 = build_function_type_list (V4SI_type_node,
20507 tree v8hi_ftype_v8hi_si
20508 = build_function_type_list (V8HI_type_node,
20513 tree v16qi_ftype_v16qi_si
20514 = build_function_type_list (V16QI_type_node,
20518 tree v4sf_ftype_v4hi
20519 = build_function_type_list (V4SF_type_node,
20523 tree v4hi_ftype_v4sf
20524 = build_function_type_list (V4HI_type_node,
20528 tree v2di_ftype_v2di
20529 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
20531 tree v16qi_ftype_v8hi_v8hi
20532 = build_function_type_list (V16QI_type_node,
20533 V8HI_type_node, V8HI_type_node,
20535 tree v8hi_ftype_v4si_v4si
20536 = build_function_type_list (V8HI_type_node,
20537 V4SI_type_node, V4SI_type_node,
20539 tree v8hi_ftype_v16qi_v16qi
20540 = build_function_type_list (V8HI_type_node,
20541 V16QI_type_node, V16QI_type_node,
20543 tree v4hi_ftype_v8qi_v8qi
20544 = build_function_type_list (V4HI_type_node,
20545 V8QI_type_node, V8QI_type_node,
20547 tree unsigned_ftype_unsigned_uchar
20548 = build_function_type_list (unsigned_type_node,
20549 unsigned_type_node,
20550 unsigned_char_type_node,
20552 tree unsigned_ftype_unsigned_ushort
20553 = build_function_type_list (unsigned_type_node,
20554 unsigned_type_node,
20555 short_unsigned_type_node,
20557 tree unsigned_ftype_unsigned_unsigned
20558 = build_function_type_list (unsigned_type_node,
20559 unsigned_type_node,
20560 unsigned_type_node,
20562 tree uint64_ftype_uint64_uint64
20563 = build_function_type_list (long_long_unsigned_type_node,
20564 long_long_unsigned_type_node,
20565 long_long_unsigned_type_node,
20567 tree float_ftype_float
20568 = build_function_type_list (float_type_node,
20574 /* Add all special builtins with variable number of operands. */
20575 for (i = 0, d = bdesc_special_args;
20576 i < ARRAY_SIZE (bdesc_special_args);
20584 switch ((enum ix86_special_builtin_type) d->flag)
20586 case VOID_FTYPE_VOID:
20587 type = void_ftype_void;
20589 case V16QI_FTYPE_PCCHAR:
20590 type = v16qi_ftype_pcchar;
20592 case V4SF_FTYPE_PCFLOAT:
20593 type = v4sf_ftype_pcfloat;
20595 case V2DI_FTYPE_PV2DI:
20596 type = v2di_ftype_pv2di;
20598 case V2DF_FTYPE_PCDOUBLE:
20599 type = v2df_ftype_pcdouble;
20601 case V4SF_FTYPE_V4SF_PCV2SF:
20602 type = v4sf_ftype_v4sf_pcv2sf;
20604 case V2DF_FTYPE_V2DF_PCDOUBLE:
20605 type = v2df_ftype_v2df_pcdouble;
20607 case VOID_FTYPE_PV2SF_V4SF:
20608 type = void_ftype_pv2sf_v4sf;
20610 case VOID_FTYPE_PV2DI_V2DI:
20611 type = void_ftype_pv2di_v2di;
20613 case VOID_FTYPE_PCHAR_V16QI:
20614 type = void_ftype_pchar_v16qi;
20616 case VOID_FTYPE_PFLOAT_V4SF:
20617 type = void_ftype_pfloat_v4sf;
20619 case VOID_FTYPE_PDOUBLE_V2DF:
20620 type = void_ftype_pdouble_v2df;
20622 case VOID_FTYPE_PDI_DI:
20623 type = void_ftype_pdi_di;
20625 case VOID_FTYPE_PINT_INT:
20626 type = void_ftype_pint_int;
20629 gcc_unreachable ();
20632 def_builtin (d->mask, d->name, type, d->code);
20635 /* Add all builtins with variable number of operands. */
20636 for (i = 0, d = bdesc_args;
20637 i < ARRAY_SIZE (bdesc_args);
20645 switch ((enum ix86_builtin_type) d->flag)
20647 case FLOAT_FTYPE_FLOAT:
20648 type = float_ftype_float;
20650 case INT_FTYPE_V2DI_V2DI_PTEST:
20651 type = int_ftype_v2di_v2di;
20653 case INT64_FTYPE_V4SF:
20654 type = int64_ftype_v4sf;
20656 case INT64_FTYPE_V2DF:
20657 type = int64_ftype_v2df;
20659 case INT_FTYPE_V16QI:
20660 type = int_ftype_v16qi;
20662 case INT_FTYPE_V8QI:
20663 type = int_ftype_v8qi;
20665 case INT_FTYPE_V4SF:
20666 type = int_ftype_v4sf;
20668 case INT_FTYPE_V2DF:
20669 type = int_ftype_v2df;
20671 case V16QI_FTYPE_V16QI:
20672 type = v16qi_ftype_v16qi;
20674 case V8HI_FTYPE_V8HI:
20675 type = v8hi_ftype_v8hi;
20677 case V8HI_FTYPE_V16QI:
20678 type = v8hi_ftype_v16qi;
20680 case V8QI_FTYPE_V8QI:
20681 type = v8qi_ftype_v8qi;
20683 case V4SI_FTYPE_V4SI:
20684 type = v4si_ftype_v4si;
20686 case V4SI_FTYPE_V16QI:
20687 type = v4si_ftype_v16qi;
20689 case V4SI_FTYPE_V8HI:
20690 type = v4si_ftype_v8hi;
20692 case V4SI_FTYPE_V4SF:
20693 type = v4si_ftype_v4sf;
20695 case V4SI_FTYPE_V2DF:
20696 type = v4si_ftype_v2df;
20698 case V4HI_FTYPE_V4HI:
20699 type = v4hi_ftype_v4hi;
20701 case V4SF_FTYPE_V4SF:
20702 case V4SF_FTYPE_V4SF_VEC_MERGE:
20703 type = v4sf_ftype_v4sf;
20705 case V4SF_FTYPE_V4SI:
20706 type = v4sf_ftype_v4si;
20708 case V4SF_FTYPE_V2DF:
20709 type = v4sf_ftype_v2df;
20711 case V2DI_FTYPE_V2DI:
20712 type = v2di_ftype_v2di;
20714 case V2DI_FTYPE_V16QI:
20715 type = v2di_ftype_v16qi;
20717 case V2DI_FTYPE_V8HI:
20718 type = v2di_ftype_v8hi;
20720 case V2DI_FTYPE_V4SI:
20721 type = v2di_ftype_v4si;
20723 case V2SI_FTYPE_V2SI:
20724 type = v2si_ftype_v2si;
20726 case V2SI_FTYPE_V4SF:
20727 type = v2si_ftype_v4sf;
20729 case V2SI_FTYPE_V2DF:
20730 type = v2si_ftype_v2df;
20732 case V2SI_FTYPE_V2SF:
20733 type = v2si_ftype_v2sf;
20735 case V2DF_FTYPE_V4SF:
20736 type = v2df_ftype_v4sf;
20738 case V2DF_FTYPE_V2DF:
20739 case V2DF_FTYPE_V2DF_VEC_MERGE:
20740 type = v2df_ftype_v2df;
20742 case V2DF_FTYPE_V2SI:
20743 type = v2df_ftype_v2si;
20745 case V2DF_FTYPE_V4SI:
20746 type = v2df_ftype_v4si;
20748 case V2SF_FTYPE_V2SF:
20749 type = v2sf_ftype_v2sf;
20751 case V2SF_FTYPE_V2SI:
20752 type = v2sf_ftype_v2si;
20754 case V16QI_FTYPE_V16QI_V16QI:
20755 type = v16qi_ftype_v16qi_v16qi;
20757 case V16QI_FTYPE_V8HI_V8HI:
20758 type = v16qi_ftype_v8hi_v8hi;
20760 case V8QI_FTYPE_V8QI_V8QI:
20761 type = v8qi_ftype_v8qi_v8qi;
20763 case V8QI_FTYPE_V4HI_V4HI:
20764 type = v8qi_ftype_v4hi_v4hi;
20766 case V8HI_FTYPE_V8HI_V8HI:
20767 case V8HI_FTYPE_V8HI_V8HI_COUNT:
20768 type = v8hi_ftype_v8hi_v8hi;
20770 case V8HI_FTYPE_V16QI_V16QI:
20771 type = v8hi_ftype_v16qi_v16qi;
20773 case V8HI_FTYPE_V4SI_V4SI:
20774 type = v8hi_ftype_v4si_v4si;
20776 case V8HI_FTYPE_V8HI_SI_COUNT:
20777 type = v8hi_ftype_v8hi_int;
20779 case V4SI_FTYPE_V4SI_V4SI:
20780 case V4SI_FTYPE_V4SI_V4SI_COUNT:
20781 type = v4si_ftype_v4si_v4si;
20783 case V4SI_FTYPE_V8HI_V8HI:
20784 type = v4si_ftype_v8hi_v8hi;
20786 case V4SI_FTYPE_V4SF_V4SF:
20787 type = v4si_ftype_v4sf_v4sf;
20789 case V4SI_FTYPE_V2DF_V2DF:
20790 type = v4si_ftype_v2df_v2df;
20792 case V4SI_FTYPE_V4SI_SI_COUNT:
20793 type = v4si_ftype_v4si_int;
20795 case V4HI_FTYPE_V4HI_V4HI:
20796 case V4HI_FTYPE_V4HI_V4HI_COUNT:
20797 type = v4hi_ftype_v4hi_v4hi;
20799 case V4HI_FTYPE_V8QI_V8QI:
20800 type = v4hi_ftype_v8qi_v8qi;
20802 case V4HI_FTYPE_V2SI_V2SI:
20803 type = v4hi_ftype_v2si_v2si;
20805 case V4HI_FTYPE_V4HI_SI_COUNT:
20806 type = v4hi_ftype_v4hi_int;
20808 case V4SF_FTYPE_V4SF_V4SF:
20809 case V4SF_FTYPE_V4SF_V4SF_SWAP:
20810 type = v4sf_ftype_v4sf_v4sf;
20812 case V4SF_FTYPE_V4SF_V2SI:
20813 type = v4sf_ftype_v4sf_v2si;
20815 case V4SF_FTYPE_V4SF_V2DF:
20816 type = v4sf_ftype_v4sf_v2df;
20818 case V4SF_FTYPE_V4SF_DI:
20819 type = v4sf_ftype_v4sf_int64;
20821 case V4SF_FTYPE_V4SF_SI:
20822 type = v4sf_ftype_v4sf_int;
20824 case V2DI_FTYPE_V2DI_V2DI:
20825 case V2DI_FTYPE_V2DI_V2DI_COUNT:
20826 type = v2di_ftype_v2di_v2di;
20828 case V2DI_FTYPE_V16QI_V16QI:
20829 type = v2di_ftype_v16qi_v16qi;
20831 case V2DI_FTYPE_V4SI_V4SI:
20832 type = v2di_ftype_v4si_v4si;
20834 case V2DI_FTYPE_V2DI_V16QI:
20835 type = v2di_ftype_v2di_v16qi;
20837 case V2DI_FTYPE_V2DF_V2DF:
20838 type = v2di_ftype_v2df_v2df;
20840 case V2DI_FTYPE_V2DI_SI_COUNT:
20841 type = v2di_ftype_v2di_int;
20843 case V2SI_FTYPE_V2SI_V2SI:
20844 case V2SI_FTYPE_V2SI_V2SI_COUNT:
20845 type = v2si_ftype_v2si_v2si;
20847 case V2SI_FTYPE_V4HI_V4HI:
20848 type = v2si_ftype_v4hi_v4hi;
20850 case V2SI_FTYPE_V2SF_V2SF:
20851 type = v2si_ftype_v2sf_v2sf;
20853 case V2SI_FTYPE_V2SI_SI_COUNT:
20854 type = v2si_ftype_v2si_int;
20856 case V2DF_FTYPE_V2DF_V2DF:
20857 case V2DF_FTYPE_V2DF_V2DF_SWAP:
20858 type = v2df_ftype_v2df_v2df;
20860 case V2DF_FTYPE_V2DF_V4SF:
20861 type = v2df_ftype_v2df_v4sf;
20863 case V2DF_FTYPE_V2DF_DI:
20864 type = v2df_ftype_v2df_int64;
20866 case V2DF_FTYPE_V2DF_SI:
20867 type = v2df_ftype_v2df_int;
20869 case V2SF_FTYPE_V2SF_V2SF:
20870 type = v2sf_ftype_v2sf_v2sf;
20872 case V1DI_FTYPE_V1DI_V1DI:
20873 case V1DI_FTYPE_V1DI_V1DI_COUNT:
20874 type = v1di_ftype_v1di_v1di;
20876 case V1DI_FTYPE_V8QI_V8QI:
20877 type = v1di_ftype_v8qi_v8qi;
20879 case V1DI_FTYPE_V2SI_V2SI:
20880 type = v1di_ftype_v2si_v2si;
20882 case V1DI_FTYPE_V1DI_SI_COUNT:
20883 type = v1di_ftype_v1di_int;
20885 case UINT64_FTYPE_UINT64_UINT64:
20886 type = uint64_ftype_uint64_uint64;
20888 case UINT_FTYPE_UINT_UINT:
20889 type = unsigned_ftype_unsigned_unsigned;
20891 case UINT_FTYPE_UINT_USHORT:
20892 type = unsigned_ftype_unsigned_ushort;
20894 case UINT_FTYPE_UINT_UCHAR:
20895 type = unsigned_ftype_unsigned_uchar;
20897 case V8HI_FTYPE_V8HI_INT:
20898 type = v8hi_ftype_v8hi_int;
20900 case V4SI_FTYPE_V4SI_INT:
20901 type = v4si_ftype_v4si_int;
20903 case V4HI_FTYPE_V4HI_INT:
20904 type = v4hi_ftype_v4hi_int;
20906 case V4SF_FTYPE_V4SF_INT:
20907 type = v4sf_ftype_v4sf_int;
20909 case V2DI_FTYPE_V2DI_INT:
20910 case V2DI2TI_FTYPE_V2DI_INT:
20911 type = v2di_ftype_v2di_int;
20913 case V2DF_FTYPE_V2DF_INT:
20914 type = v2df_ftype_v2df_int;
20916 case V16QI_FTYPE_V16QI_V16QI_V16QI:
20917 type = v16qi_ftype_v16qi_v16qi_v16qi;
20919 case V4SF_FTYPE_V4SF_V4SF_V4SF:
20920 type = v4sf_ftype_v4sf_v4sf_v4sf;
20922 case V2DF_FTYPE_V2DF_V2DF_V2DF:
20923 type = v2df_ftype_v2df_v2df_v2df;
20925 case V16QI_FTYPE_V16QI_V16QI_INT:
20926 type = v16qi_ftype_v16qi_v16qi_int;
20928 case V8HI_FTYPE_V8HI_V8HI_INT:
20929 type = v8hi_ftype_v8hi_v8hi_int;
20931 case V4SI_FTYPE_V4SI_V4SI_INT:
20932 type = v4si_ftype_v4si_v4si_int;
20934 case V4SF_FTYPE_V4SF_V4SF_INT:
20935 type = v4sf_ftype_v4sf_v4sf_int;
20937 case V2DI_FTYPE_V2DI_V2DI_INT:
20938 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
20939 type = v2di_ftype_v2di_v2di_int;
20941 case V2DF_FTYPE_V2DF_V2DF_INT:
20942 type = v2df_ftype_v2df_v2df_int;
20944 case V2DI_FTYPE_V2DI_UINT_UINT:
20945 type = v2di_ftype_v2di_unsigned_unsigned;
20947 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
20948 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
20950 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
20951 type = v1di_ftype_v1di_v1di_int;
20954 gcc_unreachable ();
20957 def_builtin_const (d->mask, d->name, type, d->code);
20960 /* pcmpestr[im] insns. */
20961 for (i = 0, d = bdesc_pcmpestr;
20962 i < ARRAY_SIZE (bdesc_pcmpestr);
20965 if (d->code == IX86_BUILTIN_PCMPESTRM128)
20966 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
20968 ftype = int_ftype_v16qi_int_v16qi_int_int;
20969 def_builtin_const (d->mask, d->name, ftype, d->code);
20972 /* pcmpistr[im] insns. */
20973 for (i = 0, d = bdesc_pcmpistr;
20974 i < ARRAY_SIZE (bdesc_pcmpistr);
20977 if (d->code == IX86_BUILTIN_PCMPISTRM128)
20978 ftype = v16qi_ftype_v16qi_v16qi_int;
20980 ftype = int_ftype_v16qi_v16qi_int;
20981 def_builtin_const (d->mask, d->name, ftype, d->code);
20984 /* comi/ucomi insns. */
20985 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
20986 if (d->mask == OPTION_MASK_ISA_SSE2)
20987 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
20989 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
20992 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
20993 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
20995 /* SSE or 3DNow!A */
20996 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
20999 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
21001 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
21002 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
21005 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
21006 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
21009 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
21010 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
21011 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
21012 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
21013 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
21014 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
21017 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
21019 /* Access to the vec_init patterns. */
21020 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
21021 integer_type_node, NULL_TREE);
21022 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
21024 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
21025 short_integer_type_node,
21026 short_integer_type_node,
21027 short_integer_type_node, NULL_TREE);
21028 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
21030 ftype = build_function_type_list (V8QI_type_node, char_type_node,
21031 char_type_node, char_type_node,
21032 char_type_node, char_type_node,
21033 char_type_node, char_type_node,
21034 char_type_node, NULL_TREE);
21035 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
21037 /* Access to the vec_extract patterns. */
21038 ftype = build_function_type_list (double_type_node, V2DF_type_node,
21039 integer_type_node, NULL_TREE);
21040 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
21042 ftype = build_function_type_list (long_long_integer_type_node,
21043 V2DI_type_node, integer_type_node,
21045 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
21047 ftype = build_function_type_list (float_type_node, V4SF_type_node,
21048 integer_type_node, NULL_TREE);
21049 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
21051 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
21052 integer_type_node, NULL_TREE);
21053 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
21055 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
21056 integer_type_node, NULL_TREE);
21057 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
21059 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
21060 integer_type_node, NULL_TREE);
21061 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
21063 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
21064 integer_type_node, NULL_TREE);
21065 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
21067 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
21068 integer_type_node, NULL_TREE);
21069 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
21071 /* Access to the vec_set patterns. */
21072 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
21074 integer_type_node, NULL_TREE);
21075 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
21077 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
21079 integer_type_node, NULL_TREE);
21080 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
21082 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
21084 integer_type_node, NULL_TREE);
21085 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
21087 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
21089 integer_type_node, NULL_TREE);
21090 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
21092 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
21094 integer_type_node, NULL_TREE);
21095 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
21097 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
21099 integer_type_node, NULL_TREE);
21100 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
21102 /* Add SSE5 multi-arg argument instructions */
21103 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21105 tree mtype = NULL_TREE;
21110 switch ((enum multi_arg_type)d->flag)
21112 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
21113 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
21114 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
21115 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
21116 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
21117 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
21118 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
21119 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
21120 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
21121 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
21122 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
21123 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
21124 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
21125 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
21126 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
21127 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
21128 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
21129 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
21130 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
21131 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
21132 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
21133 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
21134 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
21135 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
21136 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
21137 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
21138 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
21139 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
21140 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
21141 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
21142 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
21143 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
21144 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
21145 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
21146 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
21147 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
21148 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
21149 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
21150 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
21151 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
21152 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
21153 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
21154 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
21155 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
21156 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
21157 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
21158 case MULTI_ARG_UNKNOWN:
21160 gcc_unreachable ();
21164 def_builtin_const (d->mask, d->name, mtype, d->code);
21168 /* Internal method for ix86_init_builtins. */
21171 ix86_init_builtins_va_builtins_abi (void)
21173 tree ms_va_ref, sysv_va_ref;
21174 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
21175 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
21176 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
21177 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
21181 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
21182 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
21183 ms_va_ref = build_reference_type (ms_va_list_type_node);
21185 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
21188 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
21189 fnvoid_va_start_ms =
21190 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
21191 fnvoid_va_end_sysv =
21192 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
21193 fnvoid_va_start_sysv =
21194 build_varargs_function_type_list (void_type_node, sysv_va_ref,
21196 fnvoid_va_copy_ms =
21197 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
21199 fnvoid_va_copy_sysv =
21200 build_function_type_list (void_type_node, sysv_va_ref,
21201 sysv_va_ref, NULL_TREE);
21203 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
21204 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
21205 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
21206 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
21207 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
21208 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
21209 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
21210 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
21211 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
21212 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
21213 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
21214 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
21218 ix86_init_builtins (void)
21220 tree float128_type_node = make_node (REAL_TYPE);
21223 /* The __float80 type. */
21224 if (TYPE_MODE (long_double_type_node) == XFmode)
21225 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
21229 /* The __float80 type. */
21230 tree float80_type_node = make_node (REAL_TYPE);
21232 TYPE_PRECISION (float80_type_node) = 80;
21233 layout_type (float80_type_node);
21234 (*lang_hooks.types.register_builtin_type) (float80_type_node,
21238 /* The __float128 type. */
21239 TYPE_PRECISION (float128_type_node) = 128;
21240 layout_type (float128_type_node);
21241 (*lang_hooks.types.register_builtin_type) (float128_type_node,
21244 /* TFmode support builtins. */
21245 ftype = build_function_type (float128_type_node, void_list_node);
21246 decl = add_builtin_function ("__builtin_infq", ftype,
21247 IX86_BUILTIN_INFQ, BUILT_IN_MD,
21249 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
21251 /* We will expand them to normal call if SSE2 isn't available since
21252 they are used by libgcc. */
21253 ftype = build_function_type_list (float128_type_node,
21254 float128_type_node,
21256 decl = add_builtin_function ("__builtin_fabsq", ftype,
21257 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
21258 "__fabstf2", NULL_TREE);
21259 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
21260 TREE_READONLY (decl) = 1;
21262 ftype = build_function_type_list (float128_type_node,
21263 float128_type_node,
21264 float128_type_node,
21266 decl = add_builtin_function ("__builtin_copysignq", ftype,
21267 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
21268 "__copysigntf3", NULL_TREE);
21269 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
21270 TREE_READONLY (decl) = 1;
21272 ix86_init_mmx_sse_builtins ();
21274 ix86_init_builtins_va_builtins_abi ();
21277 /* Errors in the source file can cause expand_expr to return const0_rtx
21278 where we expect a vector. To avoid crashing, use one of the vector
21279 clear instructions. */
21281 safe_vector_operand (rtx x, enum machine_mode mode)
21283 if (x == const0_rtx)
21284 x = CONST0_RTX (mode);
21288 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
21291 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
21294 tree arg0 = CALL_EXPR_ARG (exp, 0);
21295 tree arg1 = CALL_EXPR_ARG (exp, 1);
21296 rtx op0 = expand_normal (arg0);
21297 rtx op1 = expand_normal (arg1);
21298 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21299 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
21300 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
21302 if (VECTOR_MODE_P (mode0))
21303 op0 = safe_vector_operand (op0, mode0);
21304 if (VECTOR_MODE_P (mode1))
21305 op1 = safe_vector_operand (op1, mode1);
21307 if (optimize || !target
21308 || GET_MODE (target) != tmode
21309 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21310 target = gen_reg_rtx (tmode);
21312 if (GET_MODE (op1) == SImode && mode1 == TImode)
21314 rtx x = gen_reg_rtx (V4SImode);
21315 emit_insn (gen_sse2_loadd (x, op1));
21316 op1 = gen_lowpart (TImode, x);
21319 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
21320 op0 = copy_to_mode_reg (mode0, op0);
21321 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
21322 op1 = copy_to_mode_reg (mode1, op1);
21324 pat = GEN_FCN (icode) (target, op0, op1);
21333 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
21336 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
21337 enum multi_arg_type m_type,
21338 enum insn_code sub_code)
21343 bool comparison_p = false;
21345 bool last_arg_constant = false;
21346 int num_memory = 0;
21349 enum machine_mode mode;
21352 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21356 case MULTI_ARG_3_SF:
21357 case MULTI_ARG_3_DF:
21358 case MULTI_ARG_3_DI:
21359 case MULTI_ARG_3_SI:
21360 case MULTI_ARG_3_SI_DI:
21361 case MULTI_ARG_3_HI:
21362 case MULTI_ARG_3_HI_SI:
21363 case MULTI_ARG_3_QI:
21364 case MULTI_ARG_3_PERMPS:
21365 case MULTI_ARG_3_PERMPD:
21369 case MULTI_ARG_2_SF:
21370 case MULTI_ARG_2_DF:
21371 case MULTI_ARG_2_DI:
21372 case MULTI_ARG_2_SI:
21373 case MULTI_ARG_2_HI:
21374 case MULTI_ARG_2_QI:
21378 case MULTI_ARG_2_DI_IMM:
21379 case MULTI_ARG_2_SI_IMM:
21380 case MULTI_ARG_2_HI_IMM:
21381 case MULTI_ARG_2_QI_IMM:
21383 last_arg_constant = true;
21386 case MULTI_ARG_1_SF:
21387 case MULTI_ARG_1_DF:
21388 case MULTI_ARG_1_DI:
21389 case MULTI_ARG_1_SI:
21390 case MULTI_ARG_1_HI:
21391 case MULTI_ARG_1_QI:
21392 case MULTI_ARG_1_SI_DI:
21393 case MULTI_ARG_1_HI_DI:
21394 case MULTI_ARG_1_HI_SI:
21395 case MULTI_ARG_1_QI_DI:
21396 case MULTI_ARG_1_QI_SI:
21397 case MULTI_ARG_1_QI_HI:
21398 case MULTI_ARG_1_PH2PS:
21399 case MULTI_ARG_1_PS2PH:
21403 case MULTI_ARG_2_SF_CMP:
21404 case MULTI_ARG_2_DF_CMP:
21405 case MULTI_ARG_2_DI_CMP:
21406 case MULTI_ARG_2_SI_CMP:
21407 case MULTI_ARG_2_HI_CMP:
21408 case MULTI_ARG_2_QI_CMP:
21410 comparison_p = true;
21413 case MULTI_ARG_2_SF_TF:
21414 case MULTI_ARG_2_DF_TF:
21415 case MULTI_ARG_2_DI_TF:
21416 case MULTI_ARG_2_SI_TF:
21417 case MULTI_ARG_2_HI_TF:
21418 case MULTI_ARG_2_QI_TF:
21423 case MULTI_ARG_UNKNOWN:
21425 gcc_unreachable ();
21428 if (optimize || !target
21429 || GET_MODE (target) != tmode
21430 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21431 target = gen_reg_rtx (tmode);
21433 gcc_assert (nargs <= 4);
21435 for (i = 0; i < nargs; i++)
21437 tree arg = CALL_EXPR_ARG (exp, i);
21438 rtx op = expand_normal (arg);
21439 int adjust = (comparison_p) ? 1 : 0;
21440 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
21442 if (last_arg_constant && i == nargs-1)
21444 if (GET_CODE (op) != CONST_INT)
21446 error ("last argument must be an immediate");
21447 return gen_reg_rtx (tmode);
21452 if (VECTOR_MODE_P (mode))
21453 op = safe_vector_operand (op, mode);
21455 /* If we aren't optimizing, only allow one memory operand to be
21457 if (memory_operand (op, mode))
21460 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
21463 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
21465 op = force_reg (mode, op);
21469 args[i].mode = mode;
21475 pat = GEN_FCN (icode) (target, args[0].op);
21480 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
21481 GEN_INT ((int)sub_code));
21482 else if (! comparison_p)
21483 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
21486 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
21490 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
21495 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
21499 gcc_unreachable ();
21509 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
21510 insns with vec_merge. */
21513 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
21517 tree arg0 = CALL_EXPR_ARG (exp, 0);
21518 rtx op1, op0 = expand_normal (arg0);
21519 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21520 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
21522 if (optimize || !target
21523 || GET_MODE (target) != tmode
21524 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21525 target = gen_reg_rtx (tmode);
21527 if (VECTOR_MODE_P (mode0))
21528 op0 = safe_vector_operand (op0, mode0);
21530 if ((optimize && !register_operand (op0, mode0))
21531 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21532 op0 = copy_to_mode_reg (mode0, op0);
21535 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
21536 op1 = copy_to_mode_reg (mode0, op1);
21538 pat = GEN_FCN (icode) (target, op0, op1);
21545 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
21548 ix86_expand_sse_compare (const struct builtin_description *d,
21549 tree exp, rtx target, bool swap)
21552 tree arg0 = CALL_EXPR_ARG (exp, 0);
21553 tree arg1 = CALL_EXPR_ARG (exp, 1);
21554 rtx op0 = expand_normal (arg0);
21555 rtx op1 = expand_normal (arg1);
21557 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
21558 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
21559 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
21560 enum rtx_code comparison = d->comparison;
21562 if (VECTOR_MODE_P (mode0))
21563 op0 = safe_vector_operand (op0, mode0);
21564 if (VECTOR_MODE_P (mode1))
21565 op1 = safe_vector_operand (op1, mode1);
21567 /* Swap operands if we have a comparison that isn't available in
21571 rtx tmp = gen_reg_rtx (mode1);
21572 emit_move_insn (tmp, op1);
21577 if (optimize || !target
21578 || GET_MODE (target) != tmode
21579 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
21580 target = gen_reg_rtx (tmode);
21582 if ((optimize && !register_operand (op0, mode0))
21583 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
21584 op0 = copy_to_mode_reg (mode0, op0);
21585 if ((optimize && !register_operand (op1, mode1))
21586 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
21587 op1 = copy_to_mode_reg (mode1, op1);
21589 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
21590 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
21597 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
21600 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
21604 tree arg0 = CALL_EXPR_ARG (exp, 0);
21605 tree arg1 = CALL_EXPR_ARG (exp, 1);
21606 rtx op0 = expand_normal (arg0);
21607 rtx op1 = expand_normal (arg1);
21608 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
21609 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
21610 enum rtx_code comparison = d->comparison;
21612 if (VECTOR_MODE_P (mode0))
21613 op0 = safe_vector_operand (op0, mode0);
21614 if (VECTOR_MODE_P (mode1))
21615 op1 = safe_vector_operand (op1, mode1);
21617 /* Swap operands if we have a comparison that isn't available in
21619 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
21626 target = gen_reg_rtx (SImode);
21627 emit_move_insn (target, const0_rtx);
21628 target = gen_rtx_SUBREG (QImode, target, 0);
21630 if ((optimize && !register_operand (op0, mode0))
21631 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
21632 op0 = copy_to_mode_reg (mode0, op0);
21633 if ((optimize && !register_operand (op1, mode1))
21634 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
21635 op1 = copy_to_mode_reg (mode1, op1);
21637 pat = GEN_FCN (d->icode) (op0, op1);
21641 emit_insn (gen_rtx_SET (VOIDmode,
21642 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
21643 gen_rtx_fmt_ee (comparison, QImode,
21647 return SUBREG_REG (target);
21650 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
21653 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
21657 tree arg0 = CALL_EXPR_ARG (exp, 0);
21658 tree arg1 = CALL_EXPR_ARG (exp, 1);
21659 rtx op0 = expand_normal (arg0);
21660 rtx op1 = expand_normal (arg1);
21661 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
21662 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
21663 enum rtx_code comparison = d->comparison;
21665 if (VECTOR_MODE_P (mode0))
21666 op0 = safe_vector_operand (op0, mode0);
21667 if (VECTOR_MODE_P (mode1))
21668 op1 = safe_vector_operand (op1, mode1);
21670 target = gen_reg_rtx (SImode);
21671 emit_move_insn (target, const0_rtx);
21672 target = gen_rtx_SUBREG (QImode, target, 0);
21674 if ((optimize && !register_operand (op0, mode0))
21675 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
21676 op0 = copy_to_mode_reg (mode0, op0);
21677 if ((optimize && !register_operand (op1, mode1))
21678 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
21679 op1 = copy_to_mode_reg (mode1, op1);
21681 pat = GEN_FCN (d->icode) (op0, op1);
21685 emit_insn (gen_rtx_SET (VOIDmode,
21686 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
21687 gen_rtx_fmt_ee (comparison, QImode,
21691 return SUBREG_REG (target);
21694 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
21697 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
21698 tree exp, rtx target)
21701 tree arg0 = CALL_EXPR_ARG (exp, 0);
21702 tree arg1 = CALL_EXPR_ARG (exp, 1);
21703 tree arg2 = CALL_EXPR_ARG (exp, 2);
21704 tree arg3 = CALL_EXPR_ARG (exp, 3);
21705 tree arg4 = CALL_EXPR_ARG (exp, 4);
21706 rtx scratch0, scratch1;
21707 rtx op0 = expand_normal (arg0);
21708 rtx op1 = expand_normal (arg1);
21709 rtx op2 = expand_normal (arg2);
21710 rtx op3 = expand_normal (arg3);
21711 rtx op4 = expand_normal (arg4);
21712 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
21714 tmode0 = insn_data[d->icode].operand[0].mode;
21715 tmode1 = insn_data[d->icode].operand[1].mode;
21716 modev2 = insn_data[d->icode].operand[2].mode;
21717 modei3 = insn_data[d->icode].operand[3].mode;
21718 modev4 = insn_data[d->icode].operand[4].mode;
21719 modei5 = insn_data[d->icode].operand[5].mode;
21720 modeimm = insn_data[d->icode].operand[6].mode;
21722 if (VECTOR_MODE_P (modev2))
21723 op0 = safe_vector_operand (op0, modev2);
21724 if (VECTOR_MODE_P (modev4))
21725 op2 = safe_vector_operand (op2, modev4);
21727 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
21728 op0 = copy_to_mode_reg (modev2, op0);
21729 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
21730 op1 = copy_to_mode_reg (modei3, op1);
21731 if ((optimize && !register_operand (op2, modev4))
21732 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
21733 op2 = copy_to_mode_reg (modev4, op2);
21734 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
21735 op3 = copy_to_mode_reg (modei5, op3);
21737 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
21739 error ("the fifth argument must be a 8-bit immediate");
21743 if (d->code == IX86_BUILTIN_PCMPESTRI128)
21745 if (optimize || !target
21746 || GET_MODE (target) != tmode0
21747 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
21748 target = gen_reg_rtx (tmode0);
21750 scratch1 = gen_reg_rtx (tmode1);
21752 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
21754 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
21756 if (optimize || !target
21757 || GET_MODE (target) != tmode1
21758 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
21759 target = gen_reg_rtx (tmode1);
21761 scratch0 = gen_reg_rtx (tmode0);
21763 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
21767 gcc_assert (d->flag);
21769 scratch0 = gen_reg_rtx (tmode0);
21770 scratch1 = gen_reg_rtx (tmode1);
21772 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
21782 target = gen_reg_rtx (SImode);
21783 emit_move_insn (target, const0_rtx);
21784 target = gen_rtx_SUBREG (QImode, target, 0);
21787 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
21788 gen_rtx_fmt_ee (EQ, QImode,
21789 gen_rtx_REG ((enum machine_mode) d->flag,
21792 return SUBREG_REG (target);
21799 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
21802 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
21803 tree exp, rtx target)
21806 tree arg0 = CALL_EXPR_ARG (exp, 0);
21807 tree arg1 = CALL_EXPR_ARG (exp, 1);
21808 tree arg2 = CALL_EXPR_ARG (exp, 2);
21809 rtx scratch0, scratch1;
21810 rtx op0 = expand_normal (arg0);
21811 rtx op1 = expand_normal (arg1);
21812 rtx op2 = expand_normal (arg2);
21813 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
21815 tmode0 = insn_data[d->icode].operand[0].mode;
21816 tmode1 = insn_data[d->icode].operand[1].mode;
21817 modev2 = insn_data[d->icode].operand[2].mode;
21818 modev3 = insn_data[d->icode].operand[3].mode;
21819 modeimm = insn_data[d->icode].operand[4].mode;
21821 if (VECTOR_MODE_P (modev2))
21822 op0 = safe_vector_operand (op0, modev2);
21823 if (VECTOR_MODE_P (modev3))
21824 op1 = safe_vector_operand (op1, modev3);
21826 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
21827 op0 = copy_to_mode_reg (modev2, op0);
21828 if ((optimize && !register_operand (op1, modev3))
21829 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
21830 op1 = copy_to_mode_reg (modev3, op1);
21832 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
21834 error ("the third argument must be a 8-bit immediate");
21838 if (d->code == IX86_BUILTIN_PCMPISTRI128)
21840 if (optimize || !target
21841 || GET_MODE (target) != tmode0
21842 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
21843 target = gen_reg_rtx (tmode0);
21845 scratch1 = gen_reg_rtx (tmode1);
21847 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
21849 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
21851 if (optimize || !target
21852 || GET_MODE (target) != tmode1
21853 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
21854 target = gen_reg_rtx (tmode1);
21856 scratch0 = gen_reg_rtx (tmode0);
21858 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
21862 gcc_assert (d->flag);
21864 scratch0 = gen_reg_rtx (tmode0);
21865 scratch1 = gen_reg_rtx (tmode1);
21867 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
21877 target = gen_reg_rtx (SImode);
21878 emit_move_insn (target, const0_rtx);
21879 target = gen_rtx_SUBREG (QImode, target, 0);
21882 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
21883 gen_rtx_fmt_ee (EQ, QImode,
21884 gen_rtx_REG ((enum machine_mode) d->flag,
21887 return SUBREG_REG (target);
21893 /* Subroutine of ix86_expand_builtin to take care of insns with
21894 variable number of operands. */
21897 ix86_expand_args_builtin (const struct builtin_description *d,
21898 tree exp, rtx target)
21900 rtx pat, real_target;
21901 unsigned int i, nargs;
21902 unsigned int nargs_constant = 0;
21903 int num_memory = 0;
21907 enum machine_mode mode;
21909 bool last_arg_count = false;
21910 enum insn_code icode = d->icode;
21911 const struct insn_data *insn_p = &insn_data[icode];
21912 enum machine_mode tmode = insn_p->operand[0].mode;
21913 enum machine_mode rmode = VOIDmode;
21915 enum rtx_code comparison = d->comparison;
21917 switch ((enum ix86_builtin_type) d->flag)
21919 case INT_FTYPE_V2DI_V2DI_PTEST:
21920 return ix86_expand_sse_ptest (d, exp, target);
21921 case FLOAT128_FTYPE_FLOAT128:
21922 case FLOAT_FTYPE_FLOAT:
21923 case INT64_FTYPE_V4SF:
21924 case INT64_FTYPE_V2DF:
21925 case INT_FTYPE_V16QI:
21926 case INT_FTYPE_V8QI:
21927 case INT_FTYPE_V4SF:
21928 case INT_FTYPE_V2DF:
21929 case V16QI_FTYPE_V16QI:
21930 case V8HI_FTYPE_V8HI:
21931 case V8HI_FTYPE_V16QI:
21932 case V8QI_FTYPE_V8QI:
21933 case V4SI_FTYPE_V4SI:
21934 case V4SI_FTYPE_V16QI:
21935 case V4SI_FTYPE_V4SF:
21936 case V4SI_FTYPE_V8HI:
21937 case V4SI_FTYPE_V2DF:
21938 case V4HI_FTYPE_V4HI:
21939 case V4SF_FTYPE_V4SF:
21940 case V4SF_FTYPE_V4SI:
21941 case V4SF_FTYPE_V2DF:
21942 case V2DI_FTYPE_V2DI:
21943 case V2DI_FTYPE_V16QI:
21944 case V2DI_FTYPE_V8HI:
21945 case V2DI_FTYPE_V4SI:
21946 case V2DF_FTYPE_V2DF:
21947 case V2DF_FTYPE_V4SI:
21948 case V2DF_FTYPE_V4SF:
21949 case V2DF_FTYPE_V2SI:
21950 case V2SI_FTYPE_V2SI:
21951 case V2SI_FTYPE_V4SF:
21952 case V2SI_FTYPE_V2SF:
21953 case V2SI_FTYPE_V2DF:
21954 case V2SF_FTYPE_V2SF:
21955 case V2SF_FTYPE_V2SI:
21958 case V4SF_FTYPE_V4SF_VEC_MERGE:
21959 case V2DF_FTYPE_V2DF_VEC_MERGE:
21960 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
21961 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
21962 case V16QI_FTYPE_V16QI_V16QI:
21963 case V16QI_FTYPE_V8HI_V8HI:
21964 case V8QI_FTYPE_V8QI_V8QI:
21965 case V8QI_FTYPE_V4HI_V4HI:
21966 case V8HI_FTYPE_V8HI_V8HI:
21967 case V8HI_FTYPE_V16QI_V16QI:
21968 case V8HI_FTYPE_V4SI_V4SI:
21969 case V4SI_FTYPE_V4SI_V4SI:
21970 case V4SI_FTYPE_V8HI_V8HI:
21971 case V4SI_FTYPE_V4SF_V4SF:
21972 case V4SI_FTYPE_V2DF_V2DF:
21973 case V4HI_FTYPE_V4HI_V4HI:
21974 case V4HI_FTYPE_V8QI_V8QI:
21975 case V4HI_FTYPE_V2SI_V2SI:
21976 case V4SF_FTYPE_V4SF_V4SF:
21977 case V4SF_FTYPE_V4SF_V2SI:
21978 case V4SF_FTYPE_V4SF_V2DF:
21979 case V4SF_FTYPE_V4SF_DI:
21980 case V4SF_FTYPE_V4SF_SI:
21981 case V2DI_FTYPE_V2DI_V2DI:
21982 case V2DI_FTYPE_V16QI_V16QI:
21983 case V2DI_FTYPE_V4SI_V4SI:
21984 case V2DI_FTYPE_V2DI_V16QI:
21985 case V2DI_FTYPE_V2DF_V2DF:
21986 case V2SI_FTYPE_V2SI_V2SI:
21987 case V2SI_FTYPE_V4HI_V4HI:
21988 case V2SI_FTYPE_V2SF_V2SF:
21989 case V2DF_FTYPE_V2DF_V2DF:
21990 case V2DF_FTYPE_V2DF_V4SF:
21991 case V2DF_FTYPE_V2DF_DI:
21992 case V2DF_FTYPE_V2DF_SI:
21993 case V2SF_FTYPE_V2SF_V2SF:
21994 case V1DI_FTYPE_V1DI_V1DI:
21995 case V1DI_FTYPE_V8QI_V8QI:
21996 case V1DI_FTYPE_V2SI_V2SI:
21997 if (comparison == UNKNOWN)
21998 return ix86_expand_binop_builtin (icode, exp, target);
22001 case V4SF_FTYPE_V4SF_V4SF_SWAP:
22002 case V2DF_FTYPE_V2DF_V2DF_SWAP:
22003 gcc_assert (comparison != UNKNOWN);
22007 case V8HI_FTYPE_V8HI_V8HI_COUNT:
22008 case V8HI_FTYPE_V8HI_SI_COUNT:
22009 case V4SI_FTYPE_V4SI_V4SI_COUNT:
22010 case V4SI_FTYPE_V4SI_SI_COUNT:
22011 case V4HI_FTYPE_V4HI_V4HI_COUNT:
22012 case V4HI_FTYPE_V4HI_SI_COUNT:
22013 case V2DI_FTYPE_V2DI_V2DI_COUNT:
22014 case V2DI_FTYPE_V2DI_SI_COUNT:
22015 case V2SI_FTYPE_V2SI_V2SI_COUNT:
22016 case V2SI_FTYPE_V2SI_SI_COUNT:
22017 case V1DI_FTYPE_V1DI_V1DI_COUNT:
22018 case V1DI_FTYPE_V1DI_SI_COUNT:
22020 last_arg_count = true;
22022 case UINT64_FTYPE_UINT64_UINT64:
22023 case UINT_FTYPE_UINT_UINT:
22024 case UINT_FTYPE_UINT_USHORT:
22025 case UINT_FTYPE_UINT_UCHAR:
22028 case V2DI2TI_FTYPE_V2DI_INT:
22031 nargs_constant = 1;
22033 case V8HI_FTYPE_V8HI_INT:
22034 case V4SI_FTYPE_V4SI_INT:
22035 case V4HI_FTYPE_V4HI_INT:
22036 case V4SF_FTYPE_V4SF_INT:
22037 case V2DI_FTYPE_V2DI_INT:
22038 case V2DF_FTYPE_V2DF_INT:
22040 nargs_constant = 1;
22042 case V16QI_FTYPE_V16QI_V16QI_V16QI:
22043 case V4SF_FTYPE_V4SF_V4SF_V4SF:
22044 case V2DF_FTYPE_V2DF_V2DF_V2DF:
22047 case V16QI_FTYPE_V16QI_V16QI_INT:
22048 case V8HI_FTYPE_V8HI_V8HI_INT:
22049 case V4SI_FTYPE_V4SI_V4SI_INT:
22050 case V4SF_FTYPE_V4SF_V4SF_INT:
22051 case V2DI_FTYPE_V2DI_V2DI_INT:
22052 case V2DF_FTYPE_V2DF_V2DF_INT:
22054 nargs_constant = 1;
22056 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
22059 nargs_constant = 1;
22061 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
22064 nargs_constant = 1;
22066 case V2DI_FTYPE_V2DI_UINT_UINT:
22068 nargs_constant = 2;
22070 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
22072 nargs_constant = 2;
22075 gcc_unreachable ();
22078 gcc_assert (nargs <= ARRAY_SIZE (args));
22080 if (comparison != UNKNOWN)
22082 gcc_assert (nargs == 2);
22083 return ix86_expand_sse_compare (d, exp, target, swap);
22086 if (rmode == VOIDmode || rmode == tmode)
22090 || GET_MODE (target) != tmode
22091 || ! (*insn_p->operand[0].predicate) (target, tmode))
22092 target = gen_reg_rtx (tmode);
22093 real_target = target;
22097 target = gen_reg_rtx (rmode);
22098 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
22101 for (i = 0; i < nargs; i++)
22103 tree arg = CALL_EXPR_ARG (exp, i);
22104 rtx op = expand_normal (arg);
22105 enum machine_mode mode = insn_p->operand[i + 1].mode;
22106 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
22108 if (last_arg_count && (i + 1) == nargs)
22110 /* SIMD shift insns take either an 8-bit immediate or
22111 register as count. But builtin functions take int as
22112 count. If count doesn't match, we put it in register. */
22115 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
22116 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
22117 op = copy_to_reg (op);
22120 else if ((nargs - i) <= nargs_constant)
22125 case CODE_FOR_sse4_1_roundpd:
22126 case CODE_FOR_sse4_1_roundps:
22127 case CODE_FOR_sse4_1_roundsd:
22128 case CODE_FOR_sse4_1_roundss:
22129 case CODE_FOR_sse4_1_blendps:
22130 error ("the last argument must be a 4-bit immediate");
22133 case CODE_FOR_sse4_1_blendpd:
22134 error ("the last argument must be a 2-bit immediate");
22138 switch (nargs_constant)
22141 if ((nargs - i) == nargs_constant)
22143 error ("the next to last argument must be an 8-bit immediate");
22147 error ("the last argument must be an 8-bit immediate");
22150 gcc_unreachable ();
22157 if (VECTOR_MODE_P (mode))
22158 op = safe_vector_operand (op, mode);
22160 /* If we aren't optimizing, only allow one memory operand to
22162 if (memory_operand (op, mode))
22165 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
22167 if (optimize || !match || num_memory > 1)
22168 op = copy_to_mode_reg (mode, op);
22172 op = copy_to_reg (op);
22173 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
22178 args[i].mode = mode;
22184 pat = GEN_FCN (icode) (real_target, args[0].op);
22187 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
22190 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
22194 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
22195 args[2].op, args[3].op);
22198 gcc_unreachable ();
22208 /* Subroutine of ix86_expand_builtin to take care of special insns
22209 with variable number of operands. */
22212 ix86_expand_special_args_builtin (const struct builtin_description *d,
22213 tree exp, rtx target)
22217 unsigned int i, nargs, arg_adjust, memory;
22221 enum machine_mode mode;
22223 enum insn_code icode = d->icode;
22224 bool last_arg_constant = false;
22225 const struct insn_data *insn_p = &insn_data[icode];
22226 enum machine_mode tmode = insn_p->operand[0].mode;
22227 enum { load, store } class;
22229 switch ((enum ix86_special_builtin_type) d->flag)
22231 case VOID_FTYPE_VOID:
22232 emit_insn (GEN_FCN (icode) (target));
22234 case V2DI_FTYPE_PV2DI:
22235 case V16QI_FTYPE_PCCHAR:
22236 case V4SF_FTYPE_PCFLOAT:
22237 case V2DF_FTYPE_PCDOUBLE:
22242 case VOID_FTYPE_PV2SF_V4SF:
22243 case VOID_FTYPE_PV2DI_V2DI:
22244 case VOID_FTYPE_PCHAR_V16QI:
22245 case VOID_FTYPE_PFLOAT_V4SF:
22246 case VOID_FTYPE_PDOUBLE_V2DF:
22247 case VOID_FTYPE_PDI_DI:
22248 case VOID_FTYPE_PINT_INT:
22251 /* Reserve memory operand for target. */
22252 memory = ARRAY_SIZE (args);
22254 case V4SF_FTYPE_V4SF_PCV2SF:
22255 case V2DF_FTYPE_V2DF_PCDOUBLE:
22261 gcc_unreachable ();
22264 gcc_assert (nargs <= ARRAY_SIZE (args));
22266 if (class == store)
22268 arg = CALL_EXPR_ARG (exp, 0);
22269 op = expand_normal (arg);
22270 gcc_assert (target == 0);
22271 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
22279 || GET_MODE (target) != tmode
22280 || ! (*insn_p->operand[0].predicate) (target, tmode))
22281 target = gen_reg_rtx (tmode);
22284 for (i = 0; i < nargs; i++)
22286 enum machine_mode mode = insn_p->operand[i + 1].mode;
22289 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
22290 op = expand_normal (arg);
22291 match = (*insn_p->operand[i + 1].predicate) (op, mode);
22293 if (last_arg_constant && (i + 1) == nargs)
22299 error ("the last argument must be an 8-bit immediate");
22307 /* This must be the memory operand. */
22308 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
22309 gcc_assert (GET_MODE (op) == mode
22310 || GET_MODE (op) == VOIDmode);
22314 /* This must be register. */
22315 if (VECTOR_MODE_P (mode))
22316 op = safe_vector_operand (op, mode);
22318 gcc_assert (GET_MODE (op) == mode
22319 || GET_MODE (op) == VOIDmode);
22320 op = copy_to_mode_reg (mode, op);
22325 args[i].mode = mode;
22331 pat = GEN_FCN (icode) (target, args[0].op);
22334 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
22337 gcc_unreachable ();
22343 return class == store ? 0 : target;
22346 /* Return the integer constant in ARG. Constrain it to be in the range
22347 of the subparts of VEC_TYPE; issue an error if not. */
22350 get_element_number (tree vec_type, tree arg)
22352 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
22354 if (!host_integerp (arg, 1)
22355 || (elt = tree_low_cst (arg, 1), elt > max))
22357 error ("selector must be an integer constant in the range 0..%wi", max);
22364 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
22365 ix86_expand_vector_init. We DO have language-level syntax for this, in
22366 the form of (type){ init-list }. Except that since we can't place emms
22367 instructions from inside the compiler, we can't allow the use of MMX
22368 registers unless the user explicitly asks for it. So we do *not* define
22369 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
22370 we have builtins invoked by mmintrin.h that gives us license to emit
22371 these sorts of instructions. */
22374 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
22376 enum machine_mode tmode = TYPE_MODE (type);
22377 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
22378 int i, n_elt = GET_MODE_NUNITS (tmode);
22379 rtvec v = rtvec_alloc (n_elt);
22381 gcc_assert (VECTOR_MODE_P (tmode));
22382 gcc_assert (call_expr_nargs (exp) == n_elt);
22384 for (i = 0; i < n_elt; ++i)
22386 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
22387 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
22390 if (!target || !register_operand (target, tmode))
22391 target = gen_reg_rtx (tmode);
22393 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
22397 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
22398 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
22399 had a language-level syntax for referencing vector elements. */
22402 ix86_expand_vec_ext_builtin (tree exp, rtx target)
22404 enum machine_mode tmode, mode0;
22409 arg0 = CALL_EXPR_ARG (exp, 0);
22410 arg1 = CALL_EXPR_ARG (exp, 1);
22412 op0 = expand_normal (arg0);
22413 elt = get_element_number (TREE_TYPE (arg0), arg1);
22415 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
22416 mode0 = TYPE_MODE (TREE_TYPE (arg0));
22417 gcc_assert (VECTOR_MODE_P (mode0));
22419 op0 = force_reg (mode0, op0);
22421 if (optimize || !target || !register_operand (target, tmode))
22422 target = gen_reg_rtx (tmode);
22424 ix86_expand_vector_extract (true, target, op0, elt);
22429 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
22430 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
22431 a language-level syntax for referencing vector elements. */
22434 ix86_expand_vec_set_builtin (tree exp)
22436 enum machine_mode tmode, mode1;
22437 tree arg0, arg1, arg2;
22439 rtx op0, op1, target;
22441 arg0 = CALL_EXPR_ARG (exp, 0);
22442 arg1 = CALL_EXPR_ARG (exp, 1);
22443 arg2 = CALL_EXPR_ARG (exp, 2);
22445 tmode = TYPE_MODE (TREE_TYPE (arg0));
22446 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
22447 gcc_assert (VECTOR_MODE_P (tmode));
22449 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
22450 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
22451 elt = get_element_number (TREE_TYPE (arg0), arg2);
22453 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
22454 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
22456 op0 = force_reg (tmode, op0);
22457 op1 = force_reg (mode1, op1);
22459 /* OP0 is the source of these builtin functions and shouldn't be
22460 modified. Create a copy, use it and return it as target. */
22461 target = gen_reg_rtx (tmode);
22462 emit_move_insn (target, op0);
22463 ix86_expand_vector_set (true, target, op1, elt);
22468 /* Expand an expression EXP that calls a built-in function,
22469 with result going to TARGET if that's convenient
22470 (and in mode MODE if that's convenient).
22471 SUBTARGET may be used as the target for computing one of EXP's operands.
22472 IGNORE is nonzero if the value is to be ignored. */
22475 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
22476 enum machine_mode mode ATTRIBUTE_UNUSED,
22477 int ignore ATTRIBUTE_UNUSED)
22479 const struct builtin_description *d;
22481 enum insn_code icode;
22482 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
22483 tree arg0, arg1, arg2;
22484 rtx op0, op1, op2, pat;
22485 enum machine_mode mode0, mode1, mode2;
22486 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
22488 /* Determine whether the builtin function is available under the current ISA.
22489 Originally the builtin was not created if it wasn't applicable to the
22490 current ISA based on the command line switches. With function specific
22491 options, we need to check in the context of the function making the call
22492 whether it is supported. */
22493 if (ix86_builtins_isa[fcode]
22494 && !(ix86_builtins_isa[fcode] & ix86_isa_flags))
22496 char *opts = ix86_target_string (ix86_builtins_isa[fcode], 0, NULL,
22497 NULL, NULL, false);
22500 error ("%qE needs unknown isa option", fndecl);
22503 gcc_assert (opts != NULL);
22504 error ("%qE needs isa option %s", fndecl, opts);
22512 case IX86_BUILTIN_MASKMOVQ:
22513 case IX86_BUILTIN_MASKMOVDQU:
22514 icode = (fcode == IX86_BUILTIN_MASKMOVQ
22515 ? CODE_FOR_mmx_maskmovq
22516 : CODE_FOR_sse2_maskmovdqu);
22517 /* Note the arg order is different from the operand order. */
22518 arg1 = CALL_EXPR_ARG (exp, 0);
22519 arg2 = CALL_EXPR_ARG (exp, 1);
22520 arg0 = CALL_EXPR_ARG (exp, 2);
22521 op0 = expand_normal (arg0);
22522 op1 = expand_normal (arg1);
22523 op2 = expand_normal (arg2);
22524 mode0 = insn_data[icode].operand[0].mode;
22525 mode1 = insn_data[icode].operand[1].mode;
22526 mode2 = insn_data[icode].operand[2].mode;
22528 op0 = force_reg (Pmode, op0);
22529 op0 = gen_rtx_MEM (mode1, op0);
22531 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
22532 op0 = copy_to_mode_reg (mode0, op0);
22533 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
22534 op1 = copy_to_mode_reg (mode1, op1);
22535 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
22536 op2 = copy_to_mode_reg (mode2, op2);
22537 pat = GEN_FCN (icode) (op0, op1, op2);
22543 case IX86_BUILTIN_LDMXCSR:
22544 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
22545 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
22546 emit_move_insn (target, op0);
22547 emit_insn (gen_sse_ldmxcsr (target));
22550 case IX86_BUILTIN_STMXCSR:
22551 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
22552 emit_insn (gen_sse_stmxcsr (target));
22553 return copy_to_mode_reg (SImode, target);
22555 case IX86_BUILTIN_CLFLUSH:
22556 arg0 = CALL_EXPR_ARG (exp, 0);
22557 op0 = expand_normal (arg0);
22558 icode = CODE_FOR_sse2_clflush;
22559 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
22560 op0 = copy_to_mode_reg (Pmode, op0);
22562 emit_insn (gen_sse2_clflush (op0));
22565 case IX86_BUILTIN_MONITOR:
22566 arg0 = CALL_EXPR_ARG (exp, 0);
22567 arg1 = CALL_EXPR_ARG (exp, 1);
22568 arg2 = CALL_EXPR_ARG (exp, 2);
22569 op0 = expand_normal (arg0);
22570 op1 = expand_normal (arg1);
22571 op2 = expand_normal (arg2);
22573 op0 = copy_to_mode_reg (Pmode, op0);
22575 op1 = copy_to_mode_reg (SImode, op1);
22577 op2 = copy_to_mode_reg (SImode, op2);
22578 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
22581 case IX86_BUILTIN_MWAIT:
22582 arg0 = CALL_EXPR_ARG (exp, 0);
22583 arg1 = CALL_EXPR_ARG (exp, 1);
22584 op0 = expand_normal (arg0);
22585 op1 = expand_normal (arg1);
22587 op0 = copy_to_mode_reg (SImode, op0);
22589 op1 = copy_to_mode_reg (SImode, op1);
22590 emit_insn (gen_sse3_mwait (op0, op1));
22593 case IX86_BUILTIN_VEC_INIT_V2SI:
22594 case IX86_BUILTIN_VEC_INIT_V4HI:
22595 case IX86_BUILTIN_VEC_INIT_V8QI:
22596 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
22598 case IX86_BUILTIN_VEC_EXT_V2DF:
22599 case IX86_BUILTIN_VEC_EXT_V2DI:
22600 case IX86_BUILTIN_VEC_EXT_V4SF:
22601 case IX86_BUILTIN_VEC_EXT_V4SI:
22602 case IX86_BUILTIN_VEC_EXT_V8HI:
22603 case IX86_BUILTIN_VEC_EXT_V2SI:
22604 case IX86_BUILTIN_VEC_EXT_V4HI:
22605 case IX86_BUILTIN_VEC_EXT_V16QI:
22606 return ix86_expand_vec_ext_builtin (exp, target);
22608 case IX86_BUILTIN_VEC_SET_V2DI:
22609 case IX86_BUILTIN_VEC_SET_V4SF:
22610 case IX86_BUILTIN_VEC_SET_V4SI:
22611 case IX86_BUILTIN_VEC_SET_V8HI:
22612 case IX86_BUILTIN_VEC_SET_V4HI:
22613 case IX86_BUILTIN_VEC_SET_V16QI:
22614 return ix86_expand_vec_set_builtin (exp);
22616 case IX86_BUILTIN_INFQ:
22618 REAL_VALUE_TYPE inf;
22622 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
22624 tmp = validize_mem (force_const_mem (mode, tmp));
22627 target = gen_reg_rtx (mode);
22629 emit_move_insn (target, tmp);
22637 for (i = 0, d = bdesc_special_args;
22638 i < ARRAY_SIZE (bdesc_special_args);
22640 if (d->code == fcode)
22641 return ix86_expand_special_args_builtin (d, exp, target);
22643 for (i = 0, d = bdesc_args;
22644 i < ARRAY_SIZE (bdesc_args);
22646 if (d->code == fcode)
22649 case IX86_BUILTIN_FABSQ:
22650 case IX86_BUILTIN_COPYSIGNQ:
22652 /* Emit a normal call if SSE2 isn't available. */
22653 return expand_call (exp, target, ignore);
22655 return ix86_expand_args_builtin (d, exp, target);
22658 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
22659 if (d->code == fcode)
22660 return ix86_expand_sse_comi (d, exp, target);
22662 for (i = 0, d = bdesc_pcmpestr;
22663 i < ARRAY_SIZE (bdesc_pcmpestr);
22665 if (d->code == fcode)
22666 return ix86_expand_sse_pcmpestr (d, exp, target);
22668 for (i = 0, d = bdesc_pcmpistr;
22669 i < ARRAY_SIZE (bdesc_pcmpistr);
22671 if (d->code == fcode)
22672 return ix86_expand_sse_pcmpistr (d, exp, target);
22674 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
22675 if (d->code == fcode)
22676 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
22677 (enum multi_arg_type)d->flag,
22680 gcc_unreachable ();
22683 /* Returns a function decl for a vectorized version of the builtin function
22684 with builtin function code FN and the result vector type TYPE, or NULL_TREE
22685 if it is not available. */
22688 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
22691 enum machine_mode in_mode, out_mode;
22694 if (TREE_CODE (type_out) != VECTOR_TYPE
22695 || TREE_CODE (type_in) != VECTOR_TYPE)
22698 out_mode = TYPE_MODE (TREE_TYPE (type_out));
22699 out_n = TYPE_VECTOR_SUBPARTS (type_out);
22700 in_mode = TYPE_MODE (TREE_TYPE (type_in));
22701 in_n = TYPE_VECTOR_SUBPARTS (type_in);
22705 case BUILT_IN_SQRT:
22706 if (out_mode == DFmode && out_n == 2
22707 && in_mode == DFmode && in_n == 2)
22708 return ix86_builtins[IX86_BUILTIN_SQRTPD];
22711 case BUILT_IN_SQRTF:
22712 if (out_mode == SFmode && out_n == 4
22713 && in_mode == SFmode && in_n == 4)
22714 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
22717 case BUILT_IN_LRINT:
22718 if (out_mode == SImode && out_n == 4
22719 && in_mode == DFmode && in_n == 2)
22720 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
22723 case BUILT_IN_LRINTF:
22724 if (out_mode == SImode && out_n == 4
22725 && in_mode == SFmode && in_n == 4)
22726 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
22733 /* Dispatch to a handler for a vectorization library. */
22734 if (ix86_veclib_handler)
22735 return (*ix86_veclib_handler)(fn, type_out, type_in);
22740 /* Handler for an SVML-style interface to
22741 a library with vectorized intrinsics. */
22744 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
22747 tree fntype, new_fndecl, args;
22750 enum machine_mode el_mode, in_mode;
22753 /* The SVML is suitable for unsafe math only. */
22754 if (!flag_unsafe_math_optimizations)
22757 el_mode = TYPE_MODE (TREE_TYPE (type_out));
22758 n = TYPE_VECTOR_SUBPARTS (type_out);
22759 in_mode = TYPE_MODE (TREE_TYPE (type_in));
22760 in_n = TYPE_VECTOR_SUBPARTS (type_in);
22761 if (el_mode != in_mode
22769 case BUILT_IN_LOG10:
22771 case BUILT_IN_TANH:
22773 case BUILT_IN_ATAN:
22774 case BUILT_IN_ATAN2:
22775 case BUILT_IN_ATANH:
22776 case BUILT_IN_CBRT:
22777 case BUILT_IN_SINH:
22779 case BUILT_IN_ASINH:
22780 case BUILT_IN_ASIN:
22781 case BUILT_IN_COSH:
22783 case BUILT_IN_ACOSH:
22784 case BUILT_IN_ACOS:
22785 if (el_mode != DFmode || n != 2)
22789 case BUILT_IN_EXPF:
22790 case BUILT_IN_LOGF:
22791 case BUILT_IN_LOG10F:
22792 case BUILT_IN_POWF:
22793 case BUILT_IN_TANHF:
22794 case BUILT_IN_TANF:
22795 case BUILT_IN_ATANF:
22796 case BUILT_IN_ATAN2F:
22797 case BUILT_IN_ATANHF:
22798 case BUILT_IN_CBRTF:
22799 case BUILT_IN_SINHF:
22800 case BUILT_IN_SINF:
22801 case BUILT_IN_ASINHF:
22802 case BUILT_IN_ASINF:
22803 case BUILT_IN_COSHF:
22804 case BUILT_IN_COSF:
22805 case BUILT_IN_ACOSHF:
22806 case BUILT_IN_ACOSF:
22807 if (el_mode != SFmode || n != 4)
22815 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
22817 if (fn == BUILT_IN_LOGF)
22818 strcpy (name, "vmlsLn4");
22819 else if (fn == BUILT_IN_LOG)
22820 strcpy (name, "vmldLn2");
22823 sprintf (name, "vmls%s", bname+10);
22824 name[strlen (name)-1] = '4';
22827 sprintf (name, "vmld%s2", bname+10);
22829 /* Convert to uppercase. */
22833 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
22834 args = TREE_CHAIN (args))
22838 fntype = build_function_type_list (type_out, type_in, NULL);
22840 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
22842 /* Build a function declaration for the vectorized function. */
22843 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
22844 TREE_PUBLIC (new_fndecl) = 1;
22845 DECL_EXTERNAL (new_fndecl) = 1;
22846 DECL_IS_NOVOPS (new_fndecl) = 1;
22847 TREE_READONLY (new_fndecl) = 1;
22852 /* Handler for an ACML-style interface to
22853 a library with vectorized intrinsics. */
22856 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
22858 char name[20] = "__vr.._";
22859 tree fntype, new_fndecl, args;
22862 enum machine_mode el_mode, in_mode;
22865 /* The ACML is 64bits only and suitable for unsafe math only as
22866 it does not correctly support parts of IEEE with the required
22867 precision such as denormals. */
22869 || !flag_unsafe_math_optimizations)
22872 el_mode = TYPE_MODE (TREE_TYPE (type_out));
22873 n = TYPE_VECTOR_SUBPARTS (type_out);
22874 in_mode = TYPE_MODE (TREE_TYPE (type_in));
22875 in_n = TYPE_VECTOR_SUBPARTS (type_in);
22876 if (el_mode != in_mode
22886 case BUILT_IN_LOG2:
22887 case BUILT_IN_LOG10:
22890 if (el_mode != DFmode
22895 case BUILT_IN_SINF:
22896 case BUILT_IN_COSF:
22897 case BUILT_IN_EXPF:
22898 case BUILT_IN_POWF:
22899 case BUILT_IN_LOGF:
22900 case BUILT_IN_LOG2F:
22901 case BUILT_IN_LOG10F:
22904 if (el_mode != SFmode
22913 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
22914 sprintf (name + 7, "%s", bname+10);
22917 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
22918 args = TREE_CHAIN (args))
22922 fntype = build_function_type_list (type_out, type_in, NULL);
22924 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
22926 /* Build a function declaration for the vectorized function. */
22927 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
22928 TREE_PUBLIC (new_fndecl) = 1;
22929 DECL_EXTERNAL (new_fndecl) = 1;
22930 DECL_IS_NOVOPS (new_fndecl) = 1;
22931 TREE_READONLY (new_fndecl) = 1;
22937 /* Returns a decl of a function that implements conversion of the
22938 input vector of type TYPE, or NULL_TREE if it is not available. */
22941 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
22943 if (TREE_CODE (type) != VECTOR_TYPE)
22949 switch (TYPE_MODE (type))
22952 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
22957 case FIX_TRUNC_EXPR:
22958 switch (TYPE_MODE (type))
22961 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
22971 /* Returns a code for a target-specific builtin that implements
22972 reciprocal of the function, or NULL_TREE if not available. */
22975 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
22976 bool sqrt ATTRIBUTE_UNUSED)
22978 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
22979 && flag_finite_math_only && !flag_trapping_math
22980 && flag_unsafe_math_optimizations))
22984 /* Machine dependent builtins. */
22987 /* Vectorized version of sqrt to rsqrt conversion. */
22988 case IX86_BUILTIN_SQRTPS_NR:
22989 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
22995 /* Normal builtins. */
22998 /* Sqrt to rsqrt conversion. */
22999 case BUILT_IN_SQRTF:
23000 return ix86_builtins[IX86_BUILTIN_RSQRTF];
23007 /* Store OPERAND to the memory after reload is completed. This means
23008 that we can't easily use assign_stack_local. */
23010 ix86_force_to_memory (enum machine_mode mode, rtx operand)
23014 gcc_assert (reload_completed);
23015 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
23017 result = gen_rtx_MEM (mode,
23018 gen_rtx_PLUS (Pmode,
23020 GEN_INT (-RED_ZONE_SIZE)));
23021 emit_move_insn (result, operand);
23023 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
23029 operand = gen_lowpart (DImode, operand);
23033 gen_rtx_SET (VOIDmode,
23034 gen_rtx_MEM (DImode,
23035 gen_rtx_PRE_DEC (DImode,
23036 stack_pointer_rtx)),
23040 gcc_unreachable ();
23042 result = gen_rtx_MEM (mode, stack_pointer_rtx);
23051 split_di (&operand, 1, operands, operands + 1);
23053 gen_rtx_SET (VOIDmode,
23054 gen_rtx_MEM (SImode,
23055 gen_rtx_PRE_DEC (Pmode,
23056 stack_pointer_rtx)),
23059 gen_rtx_SET (VOIDmode,
23060 gen_rtx_MEM (SImode,
23061 gen_rtx_PRE_DEC (Pmode,
23062 stack_pointer_rtx)),
23067 /* Store HImodes as SImodes. */
23068 operand = gen_lowpart (SImode, operand);
23072 gen_rtx_SET (VOIDmode,
23073 gen_rtx_MEM (GET_MODE (operand),
23074 gen_rtx_PRE_DEC (SImode,
23075 stack_pointer_rtx)),
23079 gcc_unreachable ();
23081 result = gen_rtx_MEM (mode, stack_pointer_rtx);
23086 /* Free operand from the memory. */
23088 ix86_free_from_memory (enum machine_mode mode)
23090 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
23094 if (mode == DImode || TARGET_64BIT)
23098 /* Use LEA to deallocate stack space. In peephole2 it will be converted
23099 to pop or add instruction if registers are available. */
23100 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
23101 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
23106 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
23107 QImode must go into class Q_REGS.
23108 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
23109 movdf to do mem-to-mem moves through integer regs. */
23111 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
23113 enum machine_mode mode = GET_MODE (x);
23115 /* We're only allowed to return a subclass of CLASS. Many of the
23116 following checks fail for NO_REGS, so eliminate that early. */
23117 if (regclass == NO_REGS)
23120 /* All classes can load zeros. */
23121 if (x == CONST0_RTX (mode))
23124 /* Force constants into memory if we are loading a (nonzero) constant into
23125 an MMX or SSE register. This is because there are no MMX/SSE instructions
23126 to load from a constant. */
23128 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
23131 /* Prefer SSE regs only, if we can use them for math. */
23132 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
23133 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
23135 /* Floating-point constants need more complex checks. */
23136 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
23138 /* General regs can load everything. */
23139 if (reg_class_subset_p (regclass, GENERAL_REGS))
23142 /* Floats can load 0 and 1 plus some others. Note that we eliminated
23143 zero above. We only want to wind up preferring 80387 registers if
23144 we plan on doing computation with them. */
23146 && standard_80387_constant_p (x))
23148 /* Limit class to non-sse. */
23149 if (regclass == FLOAT_SSE_REGS)
23151 if (regclass == FP_TOP_SSE_REGS)
23153 if (regclass == FP_SECOND_SSE_REGS)
23154 return FP_SECOND_REG;
23155 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
23162 /* Generally when we see PLUS here, it's the function invariant
23163 (plus soft-fp const_int). Which can only be computed into general
23165 if (GET_CODE (x) == PLUS)
23166 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
23168 /* QImode constants are easy to load, but non-constant QImode data
23169 must go into Q_REGS. */
23170 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
23172 if (reg_class_subset_p (regclass, Q_REGS))
23174 if (reg_class_subset_p (Q_REGS, regclass))
23182 /* Discourage putting floating-point values in SSE registers unless
23183 SSE math is being used, and likewise for the 387 registers. */
23185 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
23187 enum machine_mode mode = GET_MODE (x);
23189 /* Restrict the output reload class to the register bank that we are doing
23190 math on. If we would like not to return a subset of CLASS, reject this
23191 alternative: if reload cannot do this, it will still use its choice. */
23192 mode = GET_MODE (x);
23193 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
23194 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
23196 if (X87_FLOAT_MODE_P (mode))
23198 if (regclass == FP_TOP_SSE_REGS)
23200 else if (regclass == FP_SECOND_SSE_REGS)
23201 return FP_SECOND_REG;
23203 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
23209 static enum reg_class
23210 ix86_secondary_reload (bool in_p, rtx x, enum reg_class class,
23211 enum machine_mode mode,
23212 secondary_reload_info *sri ATTRIBUTE_UNUSED)
23214 /* QImode spills from non-QI registers require
23215 intermediate register on 32bit targets. */
23216 if (!in_p && mode == QImode && !TARGET_64BIT
23217 && (class == GENERAL_REGS
23218 || class == LEGACY_REGS
23219 || class == INDEX_REGS))
23228 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
23229 regno = true_regnum (x);
23231 /* Return Q_REGS if the operand is in memory. */
23239 /* If we are copying between general and FP registers, we need a memory
23240 location. The same is true for SSE and MMX registers.
23242 To optimize register_move_cost performance, allow inline variant.
23244 The macro can't work reliably when one of the CLASSES is class containing
23245 registers from multiple units (SSE, MMX, integer). We avoid this by never
23246 combining those units in single alternative in the machine description.
23247 Ensure that this constraint holds to avoid unexpected surprises.
23249 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
23250 enforce these sanity checks. */
23253 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
23254 enum machine_mode mode, int strict)
23256 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
23257 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
23258 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
23259 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
23260 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
23261 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
23263 gcc_assert (!strict);
23267 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
23270 /* ??? This is a lie. We do have moves between mmx/general, and for
23271 mmx/sse2. But by saying we need secondary memory we discourage the
23272 register allocator from using the mmx registers unless needed. */
23273 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
23276 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
23278 /* SSE1 doesn't have any direct moves from other classes. */
23282 /* If the target says that inter-unit moves are more expensive
23283 than moving through memory, then don't generate them. */
23284 if (!TARGET_INTER_UNIT_MOVES)
23287 /* Between SSE and general, we have moves no larger than word size. */
23288 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
23296 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
23297 enum machine_mode mode, int strict)
23299 return inline_secondary_memory_needed (class1, class2, mode, strict);
23302 /* Return true if the registers in CLASS cannot represent the change from
23303 modes FROM to TO. */
23306 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
23307 enum reg_class regclass)
23312 /* x87 registers can't do subreg at all, as all values are reformatted
23313 to extended precision. */
23314 if (MAYBE_FLOAT_CLASS_P (regclass))
23317 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
23319 /* Vector registers do not support QI or HImode loads. If we don't
23320 disallow a change to these modes, reload will assume it's ok to
23321 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
23322 the vec_dupv4hi pattern. */
23323 if (GET_MODE_SIZE (from) < 4)
23326 /* Vector registers do not support subreg with nonzero offsets, which
23327 are otherwise valid for integer registers. Since we can't see
23328 whether we have a nonzero offset from here, prohibit all
23329 nonparadoxical subregs changing size. */
23330 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
23337 /* Return the cost of moving data of mode M between a
23338 register and memory. A value of 2 is the default; this cost is
23339 relative to those in `REGISTER_MOVE_COST'.
23341 This function is used extensively by register_move_cost that is used to
23342 build tables at startup. Make it inline in this case.
23343 When IN is 2, return maximum of in and out move cost.
23345 If moving between registers and memory is more expensive than
23346 between two registers, you should define this macro to express the
23349 Model also increased moving costs of QImode registers in non
23353 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
23357 if (FLOAT_CLASS_P (regclass))
23375 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
23376 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
23378 if (SSE_CLASS_P (regclass))
23381 switch (GET_MODE_SIZE (mode))
23396 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
23397 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
23399 if (MMX_CLASS_P (regclass))
23402 switch (GET_MODE_SIZE (mode))
23414 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
23415 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
23417 switch (GET_MODE_SIZE (mode))
23420 if (Q_CLASS_P (regclass) || TARGET_64BIT)
23423 return ix86_cost->int_store[0];
23424 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
23425 cost = ix86_cost->movzbl_load;
23427 cost = ix86_cost->int_load[0];
23429 return MAX (cost, ix86_cost->int_store[0]);
23435 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
23437 return ix86_cost->movzbl_load;
23439 return ix86_cost->int_store[0] + 4;
23444 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
23445 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
23447 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
23448 if (mode == TFmode)
23451 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
23453 cost = ix86_cost->int_load[2];
23455 cost = ix86_cost->int_store[2];
23456 return (cost * (((int) GET_MODE_SIZE (mode)
23457 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
23462 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
23464 return inline_memory_move_cost (mode, regclass, in);
23468 /* Return the cost of moving data from a register in class CLASS1 to
23469 one in class CLASS2.
23471 It is not required that the cost always equal 2 when FROM is the same as TO;
23472 on some machines it is expensive to move between registers if they are not
23473 general registers. */
23476 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
23477 enum reg_class class2)
23479 /* In case we require secondary memory, compute cost of the store followed
23480 by load. In order to avoid bad register allocation choices, we need
23481 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
23483 if (inline_secondary_memory_needed (class1, class2, mode, 0))
23487 cost += inline_memory_move_cost (mode, class1, 2);
23488 cost += inline_memory_move_cost (mode, class2, 2);
23490 /* In case of copying from general_purpose_register we may emit multiple
23491 stores followed by single load causing memory size mismatch stall.
23492 Count this as arbitrarily high cost of 20. */
23493 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
23496 /* In the case of FP/MMX moves, the registers actually overlap, and we
23497 have to switch modes in order to treat them differently. */
23498 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
23499 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
23505 /* Moves between SSE/MMX and integer unit are expensive. */
23506 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
23507 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
23509 /* ??? By keeping returned value relatively high, we limit the number
23510 of moves between integer and MMX/SSE registers for all targets.
23511 Additionally, high value prevents problem with x86_modes_tieable_p(),
23512 where integer modes in MMX/SSE registers are not tieable
23513 because of missing QImode and HImode moves to, from or between
23514 MMX/SSE registers. */
23515 return MAX (8, ix86_cost->mmxsse_to_integer);
23517 if (MAYBE_FLOAT_CLASS_P (class1))
23518 return ix86_cost->fp_move;
23519 if (MAYBE_SSE_CLASS_P (class1))
23520 return ix86_cost->sse_move;
23521 if (MAYBE_MMX_CLASS_P (class1))
23522 return ix86_cost->mmx_move;
23526 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
23529 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
23531 /* Flags and only flags can only hold CCmode values. */
23532 if (CC_REGNO_P (regno))
23533 return GET_MODE_CLASS (mode) == MODE_CC;
23534 if (GET_MODE_CLASS (mode) == MODE_CC
23535 || GET_MODE_CLASS (mode) == MODE_RANDOM
23536 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
23538 if (FP_REGNO_P (regno))
23539 return VALID_FP_MODE_P (mode);
23540 if (SSE_REGNO_P (regno))
23542 /* We implement the move patterns for all vector modes into and
23543 out of SSE registers, even when no operation instructions
23545 return (VALID_SSE_REG_MODE (mode)
23546 || VALID_SSE2_REG_MODE (mode)
23547 || VALID_MMX_REG_MODE (mode)
23548 || VALID_MMX_REG_MODE_3DNOW (mode));
23550 if (MMX_REGNO_P (regno))
23552 /* We implement the move patterns for 3DNOW modes even in MMX mode,
23553 so if the register is available at all, then we can move data of
23554 the given mode into or out of it. */
23555 return (VALID_MMX_REG_MODE (mode)
23556 || VALID_MMX_REG_MODE_3DNOW (mode));
23559 if (mode == QImode)
23561 /* Take care for QImode values - they can be in non-QI regs,
23562 but then they do cause partial register stalls. */
23563 if (regno < 4 || TARGET_64BIT)
23565 if (!TARGET_PARTIAL_REG_STALL)
23567 return reload_in_progress || reload_completed;
23569 /* We handle both integer and floats in the general purpose registers. */
23570 else if (VALID_INT_MODE_P (mode))
23572 else if (VALID_FP_MODE_P (mode))
23574 else if (VALID_DFP_MODE_P (mode))
23576 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
23577 on to use that value in smaller contexts, this can easily force a
23578 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
23579 supporting DImode, allow it. */
23580 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
23586 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
23587 tieable integer mode. */
23590 ix86_tieable_integer_mode_p (enum machine_mode mode)
23599 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
23602 return TARGET_64BIT;
23609 /* Return true if MODE1 is accessible in a register that can hold MODE2
23610 without copying. That is, all register classes that can hold MODE2
23611 can also hold MODE1. */
23614 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
23616 if (mode1 == mode2)
23619 if (ix86_tieable_integer_mode_p (mode1)
23620 && ix86_tieable_integer_mode_p (mode2))
23623 /* MODE2 being XFmode implies fp stack or general regs, which means we
23624 can tie any smaller floating point modes to it. Note that we do not
23625 tie this with TFmode. */
23626 if (mode2 == XFmode)
23627 return mode1 == SFmode || mode1 == DFmode;
23629 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
23630 that we can tie it with SFmode. */
23631 if (mode2 == DFmode)
23632 return mode1 == SFmode;
23634 /* If MODE2 is only appropriate for an SSE register, then tie with
23635 any other mode acceptable to SSE registers. */
23636 if (GET_MODE_SIZE (mode2) == 16
23637 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
23638 return (GET_MODE_SIZE (mode1) == 16
23639 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
23641 /* If MODE2 is appropriate for an MMX register, then tie
23642 with any other mode acceptable to MMX registers. */
23643 if (GET_MODE_SIZE (mode2) == 8
23644 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
23645 return (GET_MODE_SIZE (mode1) == 8
23646 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
23651 /* Compute a (partial) cost for rtx X. Return true if the complete
23652 cost has been computed, and false if subexpressions should be
23653 scanned. In either case, *TOTAL contains the cost result. */
23656 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
23658 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
23659 enum machine_mode mode = GET_MODE (x);
23667 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
23669 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
23671 else if (flag_pic && SYMBOLIC_CONST (x)
23673 || (!GET_CODE (x) != LABEL_REF
23674 && (GET_CODE (x) != SYMBOL_REF
23675 || !SYMBOL_REF_LOCAL_P (x)))))
23682 if (mode == VOIDmode)
23685 switch (standard_80387_constant_p (x))
23690 default: /* Other constants */
23695 /* Start with (MEM (SYMBOL_REF)), since that's where
23696 it'll probably end up. Add a penalty for size. */
23697 *total = (COSTS_N_INSNS (1)
23698 + (flag_pic != 0 && !TARGET_64BIT)
23699 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
23705 /* The zero extensions is often completely free on x86_64, so make
23706 it as cheap as possible. */
23707 if (TARGET_64BIT && mode == DImode
23708 && GET_MODE (XEXP (x, 0)) == SImode)
23710 else if (TARGET_ZERO_EXTEND_WITH_AND)
23711 *total = ix86_cost->add;
23713 *total = ix86_cost->movzx;
23717 *total = ix86_cost->movsx;
23721 if (CONST_INT_P (XEXP (x, 1))
23722 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
23724 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
23727 *total = ix86_cost->add;
23730 if ((value == 2 || value == 3)
23731 && ix86_cost->lea <= ix86_cost->shift_const)
23733 *total = ix86_cost->lea;
23743 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
23745 if (CONST_INT_P (XEXP (x, 1)))
23747 if (INTVAL (XEXP (x, 1)) > 32)
23748 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
23750 *total = ix86_cost->shift_const * 2;
23754 if (GET_CODE (XEXP (x, 1)) == AND)
23755 *total = ix86_cost->shift_var * 2;
23757 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
23762 if (CONST_INT_P (XEXP (x, 1)))
23763 *total = ix86_cost->shift_const;
23765 *total = ix86_cost->shift_var;
23770 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23772 /* ??? SSE scalar cost should be used here. */
23773 *total = ix86_cost->fmul;
23776 else if (X87_FLOAT_MODE_P (mode))
23778 *total = ix86_cost->fmul;
23781 else if (FLOAT_MODE_P (mode))
23783 /* ??? SSE vector cost should be used here. */
23784 *total = ix86_cost->fmul;
23789 rtx op0 = XEXP (x, 0);
23790 rtx op1 = XEXP (x, 1);
23792 if (CONST_INT_P (XEXP (x, 1)))
23794 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
23795 for (nbits = 0; value != 0; value &= value - 1)
23799 /* This is arbitrary. */
23802 /* Compute costs correctly for widening multiplication. */
23803 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
23804 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
23805 == GET_MODE_SIZE (mode))
23807 int is_mulwiden = 0;
23808 enum machine_mode inner_mode = GET_MODE (op0);
23810 if (GET_CODE (op0) == GET_CODE (op1))
23811 is_mulwiden = 1, op1 = XEXP (op1, 0);
23812 else if (CONST_INT_P (op1))
23814 if (GET_CODE (op0) == SIGN_EXTEND)
23815 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
23818 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
23822 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
23825 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
23826 + nbits * ix86_cost->mult_bit
23827 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
23836 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23837 /* ??? SSE cost should be used here. */
23838 *total = ix86_cost->fdiv;
23839 else if (X87_FLOAT_MODE_P (mode))
23840 *total = ix86_cost->fdiv;
23841 else if (FLOAT_MODE_P (mode))
23842 /* ??? SSE vector cost should be used here. */
23843 *total = ix86_cost->fdiv;
23845 *total = ix86_cost->divide[MODE_INDEX (mode)];
23849 if (GET_MODE_CLASS (mode) == MODE_INT
23850 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
23852 if (GET_CODE (XEXP (x, 0)) == PLUS
23853 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
23854 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
23855 && CONSTANT_P (XEXP (x, 1)))
23857 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
23858 if (val == 2 || val == 4 || val == 8)
23860 *total = ix86_cost->lea;
23861 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
23862 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
23864 *total += rtx_cost (XEXP (x, 1), outer_code);
23868 else if (GET_CODE (XEXP (x, 0)) == MULT
23869 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
23871 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
23872 if (val == 2 || val == 4 || val == 8)
23874 *total = ix86_cost->lea;
23875 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
23876 *total += rtx_cost (XEXP (x, 1), outer_code);
23880 else if (GET_CODE (XEXP (x, 0)) == PLUS)
23882 *total = ix86_cost->lea;
23883 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
23884 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
23885 *total += rtx_cost (XEXP (x, 1), outer_code);
23892 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23894 /* ??? SSE cost should be used here. */
23895 *total = ix86_cost->fadd;
23898 else if (X87_FLOAT_MODE_P (mode))
23900 *total = ix86_cost->fadd;
23903 else if (FLOAT_MODE_P (mode))
23905 /* ??? SSE vector cost should be used here. */
23906 *total = ix86_cost->fadd;
23914 if (!TARGET_64BIT && mode == DImode)
23916 *total = (ix86_cost->add * 2
23917 + (rtx_cost (XEXP (x, 0), outer_code)
23918 << (GET_MODE (XEXP (x, 0)) != DImode))
23919 + (rtx_cost (XEXP (x, 1), outer_code)
23920 << (GET_MODE (XEXP (x, 1)) != DImode)));
23926 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23928 /* ??? SSE cost should be used here. */
23929 *total = ix86_cost->fchs;
23932 else if (X87_FLOAT_MODE_P (mode))
23934 *total = ix86_cost->fchs;
23937 else if (FLOAT_MODE_P (mode))
23939 /* ??? SSE vector cost should be used here. */
23940 *total = ix86_cost->fchs;
23946 if (!TARGET_64BIT && mode == DImode)
23947 *total = ix86_cost->add * 2;
23949 *total = ix86_cost->add;
23953 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
23954 && XEXP (XEXP (x, 0), 1) == const1_rtx
23955 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
23956 && XEXP (x, 1) == const0_rtx)
23958 /* This kind of construct is implemented using test[bwl].
23959 Treat it as if we had an AND. */
23960 *total = (ix86_cost->add
23961 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
23962 + rtx_cost (const1_rtx, outer_code));
23968 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
23973 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23974 /* ??? SSE cost should be used here. */
23975 *total = ix86_cost->fabs;
23976 else if (X87_FLOAT_MODE_P (mode))
23977 *total = ix86_cost->fabs;
23978 else if (FLOAT_MODE_P (mode))
23979 /* ??? SSE vector cost should be used here. */
23980 *total = ix86_cost->fabs;
23984 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23985 /* ??? SSE cost should be used here. */
23986 *total = ix86_cost->fsqrt;
23987 else if (X87_FLOAT_MODE_P (mode))
23988 *total = ix86_cost->fsqrt;
23989 else if (FLOAT_MODE_P (mode))
23990 /* ??? SSE vector cost should be used here. */
23991 *total = ix86_cost->fsqrt;
23995 if (XINT (x, 1) == UNSPEC_TP)
24006 static int current_machopic_label_num;
24008 /* Given a symbol name and its associated stub, write out the
24009 definition of the stub. */
24012 machopic_output_stub (FILE *file, const char *symb, const char *stub)
24014 unsigned int length;
24015 char *binder_name, *symbol_name, lazy_ptr_name[32];
24016 int label = ++current_machopic_label_num;
24018 /* For 64-bit we shouldn't get here. */
24019 gcc_assert (!TARGET_64BIT);
24021 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
24022 symb = (*targetm.strip_name_encoding) (symb);
24024 length = strlen (stub);
24025 binder_name = XALLOCAVEC (char, length + 32);
24026 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
24028 length = strlen (symb);
24029 symbol_name = XALLOCAVEC (char, length + 32);
24030 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
24032 sprintf (lazy_ptr_name, "L%d$lz", label);
24035 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
24037 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
24039 fprintf (file, "%s:\n", stub);
24040 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
24044 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
24045 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
24046 fprintf (file, "\tjmp\t*%%edx\n");
24049 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
24051 fprintf (file, "%s:\n", binder_name);
24055 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
24056 fprintf (file, "\tpushl\t%%eax\n");
24059 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
24061 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
24063 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
24064 fprintf (file, "%s:\n", lazy_ptr_name);
24065 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
24066 fprintf (file, "\t.long %s\n", binder_name);
24070 darwin_x86_file_end (void)
24072 darwin_file_end ();
24075 #endif /* TARGET_MACHO */
24077 /* Order the registers for register allocator. */
24080 x86_order_regs_for_local_alloc (void)
24085 /* First allocate the local general purpose registers. */
24086 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
24087 if (GENERAL_REGNO_P (i) && call_used_regs[i])
24088 reg_alloc_order [pos++] = i;
24090 /* Global general purpose registers. */
24091 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
24092 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
24093 reg_alloc_order [pos++] = i;
24095 /* x87 registers come first in case we are doing FP math
24097 if (!TARGET_SSE_MATH)
24098 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
24099 reg_alloc_order [pos++] = i;
24101 /* SSE registers. */
24102 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
24103 reg_alloc_order [pos++] = i;
24104 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
24105 reg_alloc_order [pos++] = i;
24107 /* x87 registers. */
24108 if (TARGET_SSE_MATH)
24109 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
24110 reg_alloc_order [pos++] = i;
24112 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
24113 reg_alloc_order [pos++] = i;
24115 /* Initialize the rest of array as we do not allocate some registers
24117 while (pos < FIRST_PSEUDO_REGISTER)
24118 reg_alloc_order [pos++] = 0;
24121 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
24122 struct attribute_spec.handler. */
24124 ix86_handle_abi_attribute (tree *node, tree name,
24125 tree args ATTRIBUTE_UNUSED,
24126 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
24128 if (TREE_CODE (*node) != FUNCTION_TYPE
24129 && TREE_CODE (*node) != METHOD_TYPE
24130 && TREE_CODE (*node) != FIELD_DECL
24131 && TREE_CODE (*node) != TYPE_DECL)
24133 warning (OPT_Wattributes, "%qs attribute only applies to functions",
24134 IDENTIFIER_POINTER (name));
24135 *no_add_attrs = true;
24140 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
24141 IDENTIFIER_POINTER (name));
24142 *no_add_attrs = true;
24146 /* Can combine regparm with all attributes but fastcall. */
24147 if (is_attribute_p ("ms_abi", name))
24149 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
24151 error ("ms_abi and sysv_abi attributes are not compatible");
24156 else if (is_attribute_p ("sysv_abi", name))
24158 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
24160 error ("ms_abi and sysv_abi attributes are not compatible");
24169 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
24170 struct attribute_spec.handler. */
24172 ix86_handle_struct_attribute (tree *node, tree name,
24173 tree args ATTRIBUTE_UNUSED,
24174 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
24177 if (DECL_P (*node))
24179 if (TREE_CODE (*node) == TYPE_DECL)
24180 type = &TREE_TYPE (*node);
24185 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
24186 || TREE_CODE (*type) == UNION_TYPE)))
24188 warning (OPT_Wattributes, "%qs attribute ignored",
24189 IDENTIFIER_POINTER (name));
24190 *no_add_attrs = true;
24193 else if ((is_attribute_p ("ms_struct", name)
24194 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
24195 || ((is_attribute_p ("gcc_struct", name)
24196 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
24198 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
24199 IDENTIFIER_POINTER (name));
24200 *no_add_attrs = true;
24207 ix86_ms_bitfield_layout_p (const_tree record_type)
24209 return (TARGET_MS_BITFIELD_LAYOUT &&
24210 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
24211 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
24214 /* Returns an expression indicating where the this parameter is
24215 located on entry to the FUNCTION. */
24218 x86_this_parameter (tree function)
24220 tree type = TREE_TYPE (function);
24221 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
24226 const int *parm_regs;
24228 if (ix86_function_type_abi (type) == MS_ABI)
24229 parm_regs = x86_64_ms_abi_int_parameter_registers;
24231 parm_regs = x86_64_int_parameter_registers;
24232 return gen_rtx_REG (DImode, parm_regs[aggr]);
24235 nregs = ix86_function_regparm (type, function);
24237 if (nregs > 0 && !stdarg_p (type))
24241 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
24242 regno = aggr ? DX_REG : CX_REG;
24250 return gen_rtx_MEM (SImode,
24251 plus_constant (stack_pointer_rtx, 4));
24254 return gen_rtx_REG (SImode, regno);
24257 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
24260 /* Determine whether x86_output_mi_thunk can succeed. */
24263 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
24264 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
24265 HOST_WIDE_INT vcall_offset, const_tree function)
24267 /* 64-bit can handle anything. */
24271 /* For 32-bit, everything's fine if we have one free register. */
24272 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
24275 /* Need a free register for vcall_offset. */
24279 /* Need a free register for GOT references. */
24280 if (flag_pic && !(*targetm.binds_local_p) (function))
24283 /* Otherwise ok. */
24287 /* Output the assembler code for a thunk function. THUNK_DECL is the
24288 declaration for the thunk function itself, FUNCTION is the decl for
24289 the target function. DELTA is an immediate constant offset to be
24290 added to THIS. If VCALL_OFFSET is nonzero, the word at
24291 *(*this + vcall_offset) should be added to THIS. */
24294 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
24295 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
24296 HOST_WIDE_INT vcall_offset, tree function)
24299 rtx this_param = x86_this_parameter (function);
24302 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
24303 pull it in now and let DELTA benefit. */
24304 if (REG_P (this_param))
24305 this_reg = this_param;
24306 else if (vcall_offset)
24308 /* Put the this parameter into %eax. */
24309 xops[0] = this_param;
24310 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
24311 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
24314 this_reg = NULL_RTX;
24316 /* Adjust the this parameter by a fixed constant. */
24319 xops[0] = GEN_INT (delta);
24320 xops[1] = this_reg ? this_reg : this_param;
24323 if (!x86_64_general_operand (xops[0], DImode))
24325 tmp = gen_rtx_REG (DImode, R10_REG);
24327 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
24329 xops[1] = this_param;
24331 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
24334 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
24337 /* Adjust the this parameter by a value stored in the vtable. */
24341 tmp = gen_rtx_REG (DImode, R10_REG);
24344 int tmp_regno = CX_REG;
24345 if (lookup_attribute ("fastcall",
24346 TYPE_ATTRIBUTES (TREE_TYPE (function))))
24347 tmp_regno = AX_REG;
24348 tmp = gen_rtx_REG (SImode, tmp_regno);
24351 xops[0] = gen_rtx_MEM (Pmode, this_reg);
24353 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
24355 /* Adjust the this parameter. */
24356 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
24357 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
24359 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
24360 xops[0] = GEN_INT (vcall_offset);
24362 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
24363 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
24365 xops[1] = this_reg;
24366 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
24369 /* If necessary, drop THIS back to its stack slot. */
24370 if (this_reg && this_reg != this_param)
24372 xops[0] = this_reg;
24373 xops[1] = this_param;
24374 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
24377 xops[0] = XEXP (DECL_RTL (function), 0);
24380 if (!flag_pic || (*targetm.binds_local_p) (function))
24381 output_asm_insn ("jmp\t%P0", xops);
24382 /* All thunks should be in the same object as their target,
24383 and thus binds_local_p should be true. */
24384 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
24385 gcc_unreachable ();
24388 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
24389 tmp = gen_rtx_CONST (Pmode, tmp);
24390 tmp = gen_rtx_MEM (QImode, tmp);
24392 output_asm_insn ("jmp\t%A0", xops);
24397 if (!flag_pic || (*targetm.binds_local_p) (function))
24398 output_asm_insn ("jmp\t%P0", xops);
24403 rtx sym_ref = XEXP (DECL_RTL (function), 0);
24404 tmp = (gen_rtx_SYMBOL_REF
24406 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
24407 tmp = gen_rtx_MEM (QImode, tmp);
24409 output_asm_insn ("jmp\t%0", xops);
24412 #endif /* TARGET_MACHO */
24414 tmp = gen_rtx_REG (SImode, CX_REG);
24415 output_set_got (tmp, NULL_RTX);
24418 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
24419 output_asm_insn ("jmp\t{*}%1", xops);
24425 x86_file_start (void)
24427 default_file_start ();
24429 darwin_file_start ();
24431 if (X86_FILE_START_VERSION_DIRECTIVE)
24432 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
24433 if (X86_FILE_START_FLTUSED)
24434 fputs ("\t.global\t__fltused\n", asm_out_file);
24435 if (ix86_asm_dialect == ASM_INTEL)
24436 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
24440 x86_field_alignment (tree field, int computed)
24442 enum machine_mode mode;
24443 tree type = TREE_TYPE (field);
24445 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
24447 mode = TYPE_MODE (strip_array_types (type));
24448 if (mode == DFmode || mode == DCmode
24449 || GET_MODE_CLASS (mode) == MODE_INT
24450 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
24451 return MIN (32, computed);
24455 /* Output assembler code to FILE to increment profiler label # LABELNO
24456 for profiling a function entry. */
24458 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
24462 #ifndef NO_PROFILE_COUNTERS
24463 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
24466 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
24467 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
24469 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
24473 #ifndef NO_PROFILE_COUNTERS
24474 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
24475 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
24477 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
24481 #ifndef NO_PROFILE_COUNTERS
24482 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
24483 PROFILE_COUNT_REGISTER);
24485 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
24489 /* We don't have exact information about the insn sizes, but we may assume
24490 quite safely that we are informed about all 1 byte insns and memory
24491 address sizes. This is enough to eliminate unnecessary padding in
24495 min_insn_size (rtx insn)
24499 if (!INSN_P (insn) || !active_insn_p (insn))
24502 /* Discard alignments we've emit and jump instructions. */
24503 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
24504 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
24507 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
24508 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
24511 /* Important case - calls are always 5 bytes.
24512 It is common to have many calls in the row. */
24514 && symbolic_reference_mentioned_p (PATTERN (insn))
24515 && !SIBLING_CALL_P (insn))
24517 if (get_attr_length (insn) <= 1)
24520 /* For normal instructions we may rely on the sizes of addresses
24521 and the presence of symbol to require 4 bytes of encoding.
24522 This is not the case for jumps where references are PC relative. */
24523 if (!JUMP_P (insn))
24525 l = get_attr_length_address (insn);
24526 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
24535 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
24539 ix86_avoid_jump_misspredicts (void)
24541 rtx insn, start = get_insns ();
24542 int nbytes = 0, njumps = 0;
24545 /* Look for all minimal intervals of instructions containing 4 jumps.
24546 The intervals are bounded by START and INSN. NBYTES is the total
24547 size of instructions in the interval including INSN and not including
24548 START. When the NBYTES is smaller than 16 bytes, it is possible
24549 that the end of START and INSN ends up in the same 16byte page.
24551 The smallest offset in the page INSN can start is the case where START
24552 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
24553 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
24555 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24558 nbytes += min_insn_size (insn);
24560 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
24561 INSN_UID (insn), min_insn_size (insn));
24563 && GET_CODE (PATTERN (insn)) != ADDR_VEC
24564 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
24572 start = NEXT_INSN (start);
24573 if ((JUMP_P (start)
24574 && GET_CODE (PATTERN (start)) != ADDR_VEC
24575 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
24577 njumps--, isjump = 1;
24580 nbytes -= min_insn_size (start);
24582 gcc_assert (njumps >= 0);
24584 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
24585 INSN_UID (start), INSN_UID (insn), nbytes);
24587 if (njumps == 3 && isjump && nbytes < 16)
24589 int padsize = 15 - nbytes + min_insn_size (insn);
24592 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
24593 INSN_UID (insn), padsize);
24594 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
24599 /* AMD Athlon works faster
24600 when RET is not destination of conditional jump or directly preceded
24601 by other jump instruction. We avoid the penalty by inserting NOP just
24602 before the RET instructions in such cases. */
24604 ix86_pad_returns (void)
24609 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
24611 basic_block bb = e->src;
24612 rtx ret = BB_END (bb);
24614 bool replace = false;
24616 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
24617 || !maybe_hot_bb_p (bb))
24619 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
24620 if (active_insn_p (prev) || LABEL_P (prev))
24622 if (prev && LABEL_P (prev))
24627 FOR_EACH_EDGE (e, ei, bb->preds)
24628 if (EDGE_FREQUENCY (e) && e->src->index >= 0
24629 && !(e->flags & EDGE_FALLTHRU))
24634 prev = prev_active_insn (ret);
24636 && ((JUMP_P (prev) && any_condjump_p (prev))
24639 /* Empty functions get branch mispredict even when the jump destination
24640 is not visible to us. */
24641 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
24646 emit_insn_before (gen_return_internal_long (), ret);
24652 /* Implement machine specific optimizations. We implement padding of returns
24653 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
24657 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
24658 ix86_pad_returns ();
24659 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
24660 ix86_avoid_jump_misspredicts ();
24663 /* Return nonzero when QImode register that must be represented via REX prefix
24666 x86_extended_QIreg_mentioned_p (rtx insn)
24669 extract_insn_cached (insn);
24670 for (i = 0; i < recog_data.n_operands; i++)
24671 if (REG_P (recog_data.operand[i])
24672 && REGNO (recog_data.operand[i]) >= 4)
24677 /* Return nonzero when P points to register encoded via REX prefix.
24678 Called via for_each_rtx. */
24680 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
24682 unsigned int regno;
24685 regno = REGNO (*p);
24686 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
24689 /* Return true when INSN mentions register that must be encoded using REX
24692 x86_extended_reg_mentioned_p (rtx insn)
24694 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
24697 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
24698 optabs would emit if we didn't have TFmode patterns. */
24701 x86_emit_floatuns (rtx operands[2])
24703 rtx neglab, donelab, i0, i1, f0, in, out;
24704 enum machine_mode mode, inmode;
24706 inmode = GET_MODE (operands[1]);
24707 gcc_assert (inmode == SImode || inmode == DImode);
24710 in = force_reg (inmode, operands[1]);
24711 mode = GET_MODE (out);
24712 neglab = gen_label_rtx ();
24713 donelab = gen_label_rtx ();
24714 f0 = gen_reg_rtx (mode);
24716 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
24718 expand_float (out, in, 0);
24720 emit_jump_insn (gen_jump (donelab));
24723 emit_label (neglab);
24725 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
24727 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
24729 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
24731 expand_float (f0, i0, 0);
24733 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
24735 emit_label (donelab);
24738 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
24739 with all elements equal to VAR. Return true if successful. */
24742 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
24743 rtx target, rtx val)
24745 enum machine_mode smode, wsmode, wvmode;
24760 val = force_reg (GET_MODE_INNER (mode), val);
24761 x = gen_rtx_VEC_DUPLICATE (mode, val);
24762 emit_insn (gen_rtx_SET (VOIDmode, target, x));
24768 if (TARGET_SSE || TARGET_3DNOW_A)
24770 val = gen_lowpart (SImode, val);
24771 x = gen_rtx_TRUNCATE (HImode, val);
24772 x = gen_rtx_VEC_DUPLICATE (mode, x);
24773 emit_insn (gen_rtx_SET (VOIDmode, target, x));
24795 /* Extend HImode to SImode using a paradoxical SUBREG. */
24796 tmp1 = gen_reg_rtx (SImode);
24797 emit_move_insn (tmp1, gen_lowpart (SImode, val));
24798 /* Insert the SImode value as low element of V4SImode vector. */
24799 tmp2 = gen_reg_rtx (V4SImode);
24800 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
24801 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
24802 CONST0_RTX (V4SImode),
24804 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
24805 /* Cast the V4SImode vector back to a V8HImode vector. */
24806 tmp1 = gen_reg_rtx (V8HImode);
24807 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
24808 /* Duplicate the low short through the whole low SImode word. */
24809 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
24810 /* Cast the V8HImode vector back to a V4SImode vector. */
24811 tmp2 = gen_reg_rtx (V4SImode);
24812 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
24813 /* Replicate the low element of the V4SImode vector. */
24814 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
24815 /* Cast the V2SImode back to V8HImode, and store in target. */
24816 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
24827 /* Extend QImode to SImode using a paradoxical SUBREG. */
24828 tmp1 = gen_reg_rtx (SImode);
24829 emit_move_insn (tmp1, gen_lowpart (SImode, val));
24830 /* Insert the SImode value as low element of V4SImode vector. */
24831 tmp2 = gen_reg_rtx (V4SImode);
24832 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
24833 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
24834 CONST0_RTX (V4SImode),
24836 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
24837 /* Cast the V4SImode vector back to a V16QImode vector. */
24838 tmp1 = gen_reg_rtx (V16QImode);
24839 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
24840 /* Duplicate the low byte through the whole low SImode word. */
24841 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
24842 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
24843 /* Cast the V16QImode vector back to a V4SImode vector. */
24844 tmp2 = gen_reg_rtx (V4SImode);
24845 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
24846 /* Replicate the low element of the V4SImode vector. */
24847 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
24848 /* Cast the V2SImode back to V16QImode, and store in target. */
24849 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
24857 /* Replicate the value once into the next wider mode and recurse. */
24858 val = convert_modes (wsmode, smode, val, true);
24859 x = expand_simple_binop (wsmode, ASHIFT, val,
24860 GEN_INT (GET_MODE_BITSIZE (smode)),
24861 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24862 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
24864 x = gen_reg_rtx (wvmode);
24865 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
24866 gcc_unreachable ();
24867 emit_move_insn (target, gen_lowpart (mode, x));
24875 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
24876 whose ONE_VAR element is VAR, and other elements are zero. Return true
24880 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
24881 rtx target, rtx var, int one_var)
24883 enum machine_mode vsimode;
24886 bool use_vector_set = false;
24891 use_vector_set = TARGET_64BIT && TARGET_SSE4_1;
24896 use_vector_set = TARGET_SSE4_1;
24899 use_vector_set = TARGET_SSE2;
24902 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
24908 if (use_vector_set)
24910 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
24911 var = force_reg (GET_MODE_INNER (mode), var);
24912 ix86_expand_vector_set (mmx_ok, target, var, one_var);
24928 var = force_reg (GET_MODE_INNER (mode), var);
24929 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
24930 emit_insn (gen_rtx_SET (VOIDmode, target, x));
24935 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
24936 new_target = gen_reg_rtx (mode);
24938 new_target = target;
24939 var = force_reg (GET_MODE_INNER (mode), var);
24940 x = gen_rtx_VEC_DUPLICATE (mode, var);
24941 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
24942 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
24945 /* We need to shuffle the value to the correct position, so
24946 create a new pseudo to store the intermediate result. */
24948 /* With SSE2, we can use the integer shuffle insns. */
24949 if (mode != V4SFmode && TARGET_SSE2)
24951 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
24953 GEN_INT (one_var == 1 ? 0 : 1),
24954 GEN_INT (one_var == 2 ? 0 : 1),
24955 GEN_INT (one_var == 3 ? 0 : 1)));
24956 if (target != new_target)
24957 emit_move_insn (target, new_target);
24961 /* Otherwise convert the intermediate result to V4SFmode and
24962 use the SSE1 shuffle instructions. */
24963 if (mode != V4SFmode)
24965 tmp = gen_reg_rtx (V4SFmode);
24966 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
24971 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
24973 GEN_INT (one_var == 1 ? 0 : 1),
24974 GEN_INT (one_var == 2 ? 0+4 : 1+4),
24975 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
24977 if (mode != V4SFmode)
24978 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
24979 else if (tmp != target)
24980 emit_move_insn (target, tmp);
24982 else if (target != new_target)
24983 emit_move_insn (target, new_target);
24988 vsimode = V4SImode;
24994 vsimode = V2SImode;
25000 /* Zero extend the variable element to SImode and recurse. */
25001 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
25003 x = gen_reg_rtx (vsimode);
25004 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
25006 gcc_unreachable ();
25008 emit_move_insn (target, gen_lowpart (mode, x));
25016 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
25017 consisting of the values in VALS. It is known that all elements
25018 except ONE_VAR are constants. Return true if successful. */
25021 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
25022 rtx target, rtx vals, int one_var)
25024 rtx var = XVECEXP (vals, 0, one_var);
25025 enum machine_mode wmode;
25028 const_vec = copy_rtx (vals);
25029 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
25030 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
25038 /* For the two element vectors, it's just as easy to use
25039 the general case. */
25057 /* There's no way to set one QImode entry easily. Combine
25058 the variable value with its adjacent constant value, and
25059 promote to an HImode set. */
25060 x = XVECEXP (vals, 0, one_var ^ 1);
25063 var = convert_modes (HImode, QImode, var, true);
25064 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
25065 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25066 x = GEN_INT (INTVAL (x) & 0xff);
25070 var = convert_modes (HImode, QImode, var, true);
25071 x = gen_int_mode (INTVAL (x) << 8, HImode);
25073 if (x != const0_rtx)
25074 var = expand_simple_binop (HImode, IOR, var, x, var,
25075 1, OPTAB_LIB_WIDEN);
25077 x = gen_reg_rtx (wmode);
25078 emit_move_insn (x, gen_lowpart (wmode, const_vec));
25079 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
25081 emit_move_insn (target, gen_lowpart (mode, x));
25088 emit_move_insn (target, const_vec);
25089 ix86_expand_vector_set (mmx_ok, target, var, one_var);
25093 /* A subroutine of ix86_expand_vector_init_general. Use vector
25094 concatenate to handle the most general case: all values variable,
25095 and none identical. */
25098 ix86_expand_vector_init_concat (enum machine_mode mode,
25099 rtx target, rtx *ops, int n)
25101 enum machine_mode cmode, hmode = VOIDmode;
25102 rtx first[4], second[2];
25130 gcc_unreachable ();
25133 if (!register_operand (ops[1], cmode))
25134 ops[1] = force_reg (cmode, ops[1]);
25135 if (!register_operand (ops[0], cmode))
25136 ops[0] = force_reg (cmode, ops[0]);
25137 emit_insn (gen_rtx_SET (VOIDmode, target,
25138 gen_rtx_VEC_CONCAT (mode, ops[0],
25152 gcc_unreachable ();
25157 /* FIXME: We process inputs backward to help RA. PR 36222. */
25160 for (; i > 0; i -= 2, j--)
25162 first[j] = gen_reg_rtx (cmode);
25163 v = gen_rtvec (2, ops[i - 1], ops[i]);
25164 ix86_expand_vector_init (false, first[j],
25165 gen_rtx_PARALLEL (cmode, v));
25171 gcc_assert (hmode != VOIDmode);
25172 for (i = j = 0; i < n; i += 2, j++)
25174 second[j] = gen_reg_rtx (hmode);
25175 ix86_expand_vector_init_concat (hmode, second [j],
25179 ix86_expand_vector_init_concat (mode, target, second, n);
25182 ix86_expand_vector_init_concat (mode, target, first, n);
25186 gcc_unreachable ();
25190 /* A subroutine of ix86_expand_vector_init_general. Use vector
25191 interleave to handle the most general case: all values variable,
25192 and none identical. */
25195 ix86_expand_vector_init_interleave (enum machine_mode mode,
25196 rtx target, rtx *ops, int n)
25198 enum machine_mode first_imode, second_imode, third_imode;
25201 rtx (*gen_load_even) (rtx, rtx, rtx);
25202 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
25203 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
25208 gen_load_even = gen_vec_setv8hi;
25209 gen_interleave_first_low = gen_vec_interleave_lowv4si;
25210 gen_interleave_second_low = gen_vec_interleave_lowv2di;
25211 first_imode = V4SImode;
25212 second_imode = V2DImode;
25213 third_imode = VOIDmode;
25216 gen_load_even = gen_vec_setv16qi;
25217 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
25218 gen_interleave_second_low = gen_vec_interleave_lowv4si;
25219 first_imode = V8HImode;
25220 second_imode = V4SImode;
25221 third_imode = V2DImode;
25224 gcc_unreachable ();
25227 for (i = 0; i < n; i++)
25229 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
25230 op0 = gen_reg_rtx (SImode);
25231 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
25233 /* Insert the SImode value as low element of V4SImode vector. */
25234 op1 = gen_reg_rtx (V4SImode);
25235 op0 = gen_rtx_VEC_MERGE (V4SImode,
25236 gen_rtx_VEC_DUPLICATE (V4SImode,
25238 CONST0_RTX (V4SImode),
25240 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
25242 /* Cast the V4SImode vector back to a vector in orignal mode. */
25243 op0 = gen_reg_rtx (mode);
25244 emit_move_insn (op0, gen_lowpart (mode, op1));
25246 /* Load even elements into the second positon. */
25247 emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
25250 /* Cast vector to FIRST_IMODE vector. */
25251 ops[i] = gen_reg_rtx (first_imode);
25252 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
25255 /* Interleave low FIRST_IMODE vectors. */
25256 for (i = j = 0; i < n; i += 2, j++)
25258 op0 = gen_reg_rtx (first_imode);
25259 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
25261 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
25262 ops[j] = gen_reg_rtx (second_imode);
25263 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
25266 /* Interleave low SECOND_IMODE vectors. */
25267 switch (second_imode)
25270 for (i = j = 0; i < n / 2; i += 2, j++)
25272 op0 = gen_reg_rtx (second_imode);
25273 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
25276 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
25278 ops[j] = gen_reg_rtx (third_imode);
25279 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
25281 second_imode = V2DImode;
25282 gen_interleave_second_low = gen_vec_interleave_lowv2di;
25286 op0 = gen_reg_rtx (second_imode);
25287 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
25290 /* Cast the SECOND_IMODE vector back to a vector on original
25292 emit_insn (gen_rtx_SET (VOIDmode, target,
25293 gen_lowpart (mode, op0)));
25297 gcc_unreachable ();
25301 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
25302 all values variable, and none identical. */
25305 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
25306 rtx target, rtx vals)
25315 if (!mmx_ok && !TARGET_SSE)
25323 n = GET_MODE_NUNITS (mode);
25324 for (i = 0; i < n; i++)
25325 ops[i] = XVECEXP (vals, 0, i);
25326 ix86_expand_vector_init_concat (mode, target, ops, n);
25330 if (!TARGET_SSE4_1)
25338 n = GET_MODE_NUNITS (mode);
25339 for (i = 0; i < n; i++)
25340 ops[i] = XVECEXP (vals, 0, i);
25341 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
25349 gcc_unreachable ();
25353 int i, j, n_elts, n_words, n_elt_per_word;
25354 enum machine_mode inner_mode;
25355 rtx words[4], shift;
25357 inner_mode = GET_MODE_INNER (mode);
25358 n_elts = GET_MODE_NUNITS (mode);
25359 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
25360 n_elt_per_word = n_elts / n_words;
25361 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
25363 for (i = 0; i < n_words; ++i)
25365 rtx word = NULL_RTX;
25367 for (j = 0; j < n_elt_per_word; ++j)
25369 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
25370 elt = convert_modes (word_mode, inner_mode, elt, true);
25376 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
25377 word, 1, OPTAB_LIB_WIDEN);
25378 word = expand_simple_binop (word_mode, IOR, word, elt,
25379 word, 1, OPTAB_LIB_WIDEN);
25387 emit_move_insn (target, gen_lowpart (mode, words[0]));
25388 else if (n_words == 2)
25390 rtx tmp = gen_reg_rtx (mode);
25391 emit_clobber (tmp);
25392 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
25393 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
25394 emit_move_insn (target, tmp);
25396 else if (n_words == 4)
25398 rtx tmp = gen_reg_rtx (V4SImode);
25399 gcc_assert (word_mode == SImode);
25400 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
25401 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
25402 emit_move_insn (target, gen_lowpart (mode, tmp));
25405 gcc_unreachable ();
25409 /* Initialize vector TARGET via VALS. Suppress the use of MMX
25410 instructions unless MMX_OK is true. */
25413 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
25415 enum machine_mode mode = GET_MODE (target);
25416 enum machine_mode inner_mode = GET_MODE_INNER (mode);
25417 int n_elts = GET_MODE_NUNITS (mode);
25418 int n_var = 0, one_var = -1;
25419 bool all_same = true, all_const_zero = true;
25423 for (i = 0; i < n_elts; ++i)
25425 x = XVECEXP (vals, 0, i);
25426 if (!(CONST_INT_P (x)
25427 || GET_CODE (x) == CONST_DOUBLE
25428 || GET_CODE (x) == CONST_FIXED))
25429 n_var++, one_var = i;
25430 else if (x != CONST0_RTX (inner_mode))
25431 all_const_zero = false;
25432 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
25436 /* Constants are best loaded from the constant pool. */
25439 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
25443 /* If all values are identical, broadcast the value. */
25445 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
25446 XVECEXP (vals, 0, 0)))
25449 /* Values where only one field is non-constant are best loaded from
25450 the pool and overwritten via move later. */
25454 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
25455 XVECEXP (vals, 0, one_var),
25459 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
25463 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
25467 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
25469 enum machine_mode mode = GET_MODE (target);
25470 enum machine_mode inner_mode = GET_MODE_INNER (mode);
25471 bool use_vec_merge = false;
25480 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
25481 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
25483 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
25485 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
25486 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
25492 use_vec_merge = TARGET_SSE4_1;
25500 /* For the two element vectors, we implement a VEC_CONCAT with
25501 the extraction of the other element. */
25503 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
25504 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
25507 op0 = val, op1 = tmp;
25509 op0 = tmp, op1 = val;
25511 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
25512 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
25517 use_vec_merge = TARGET_SSE4_1;
25524 use_vec_merge = true;
25528 /* tmp = target = A B C D */
25529 tmp = copy_to_reg (target);
25530 /* target = A A B B */
25531 emit_insn (gen_sse_unpcklps (target, target, target));
25532 /* target = X A B B */
25533 ix86_expand_vector_set (false, target, val, 0);
25534 /* target = A X C D */
25535 emit_insn (gen_sse_shufps_1 (target, target, tmp,
25536 GEN_INT (1), GEN_INT (0),
25537 GEN_INT (2+4), GEN_INT (3+4)));
25541 /* tmp = target = A B C D */
25542 tmp = copy_to_reg (target);
25543 /* tmp = X B C D */
25544 ix86_expand_vector_set (false, tmp, val, 0);
25545 /* target = A B X D */
25546 emit_insn (gen_sse_shufps_1 (target, target, tmp,
25547 GEN_INT (0), GEN_INT (1),
25548 GEN_INT (0+4), GEN_INT (3+4)));
25552 /* tmp = target = A B C D */
25553 tmp = copy_to_reg (target);
25554 /* tmp = X B C D */
25555 ix86_expand_vector_set (false, tmp, val, 0);
25556 /* target = A B X D */
25557 emit_insn (gen_sse_shufps_1 (target, target, tmp,
25558 GEN_INT (0), GEN_INT (1),
25559 GEN_INT (2+4), GEN_INT (0+4)));
25563 gcc_unreachable ();
25568 use_vec_merge = TARGET_SSE4_1;
25572 /* Element 0 handled by vec_merge below. */
25575 use_vec_merge = true;
25581 /* With SSE2, use integer shuffles to swap element 0 and ELT,
25582 store into element 0, then shuffle them back. */
25586 order[0] = GEN_INT (elt);
25587 order[1] = const1_rtx;
25588 order[2] = const2_rtx;
25589 order[3] = GEN_INT (3);
25590 order[elt] = const0_rtx;
25592 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
25593 order[1], order[2], order[3]));
25595 ix86_expand_vector_set (false, target, val, 0);
25597 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
25598 order[1], order[2], order[3]));
25602 /* For SSE1, we have to reuse the V4SF code. */
25603 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
25604 gen_lowpart (SFmode, val), elt);
25609 use_vec_merge = TARGET_SSE2;
25612 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
25616 use_vec_merge = TARGET_SSE4_1;
25626 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
25627 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
25628 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
25632 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
25634 emit_move_insn (mem, target);
25636 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
25637 emit_move_insn (tmp, val);
25639 emit_move_insn (target, mem);
25644 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
25646 enum machine_mode mode = GET_MODE (vec);
25647 enum machine_mode inner_mode = GET_MODE_INNER (mode);
25648 bool use_vec_extr = false;
25661 use_vec_extr = true;
25665 use_vec_extr = TARGET_SSE4_1;
25677 tmp = gen_reg_rtx (mode);
25678 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
25679 GEN_INT (elt), GEN_INT (elt),
25680 GEN_INT (elt+4), GEN_INT (elt+4)));
25684 tmp = gen_reg_rtx (mode);
25685 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
25689 gcc_unreachable ();
25692 use_vec_extr = true;
25697 use_vec_extr = TARGET_SSE4_1;
25711 tmp = gen_reg_rtx (mode);
25712 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
25713 GEN_INT (elt), GEN_INT (elt),
25714 GEN_INT (elt), GEN_INT (elt)));
25718 tmp = gen_reg_rtx (mode);
25719 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
25723 gcc_unreachable ();
25726 use_vec_extr = true;
25731 /* For SSE1, we have to reuse the V4SF code. */
25732 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
25733 gen_lowpart (V4SFmode, vec), elt);
25739 use_vec_extr = TARGET_SSE2;
25742 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
25746 use_vec_extr = TARGET_SSE4_1;
25750 /* ??? Could extract the appropriate HImode element and shift. */
25757 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
25758 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
25760 /* Let the rtl optimizers know about the zero extension performed. */
25761 if (inner_mode == QImode || inner_mode == HImode)
25763 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
25764 target = gen_lowpart (SImode, target);
25767 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
25771 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
25773 emit_move_insn (mem, vec);
25775 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
25776 emit_move_insn (target, tmp);
25780 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
25781 pattern to reduce; DEST is the destination; IN is the input vector. */
25784 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
25786 rtx tmp1, tmp2, tmp3;
25788 tmp1 = gen_reg_rtx (V4SFmode);
25789 tmp2 = gen_reg_rtx (V4SFmode);
25790 tmp3 = gen_reg_rtx (V4SFmode);
25792 emit_insn (gen_sse_movhlps (tmp1, in, in));
25793 emit_insn (fn (tmp2, tmp1, in));
25795 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
25796 GEN_INT (1), GEN_INT (1),
25797 GEN_INT (1+4), GEN_INT (1+4)));
25798 emit_insn (fn (dest, tmp2, tmp3));
25801 /* Target hook for scalar_mode_supported_p. */
25803 ix86_scalar_mode_supported_p (enum machine_mode mode)
25805 if (DECIMAL_FLOAT_MODE_P (mode))
25807 else if (mode == TFmode)
25810 return default_scalar_mode_supported_p (mode);
25813 /* Implements target hook vector_mode_supported_p. */
25815 ix86_vector_mode_supported_p (enum machine_mode mode)
25817 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
25819 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
25821 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
25823 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
25828 /* Target hook for c_mode_for_suffix. */
25829 static enum machine_mode
25830 ix86_c_mode_for_suffix (char suffix)
25840 /* Worker function for TARGET_MD_ASM_CLOBBERS.
25842 We do this in the new i386 backend to maintain source compatibility
25843 with the old cc0-based compiler. */
25846 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
25847 tree inputs ATTRIBUTE_UNUSED,
25850 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
25852 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
25857 /* Implements target vector targetm.asm.encode_section_info. This
25858 is not used by netware. */
25860 static void ATTRIBUTE_UNUSED
25861 ix86_encode_section_info (tree decl, rtx rtl, int first)
25863 default_encode_section_info (decl, rtl, first);
25865 if (TREE_CODE (decl) == VAR_DECL
25866 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
25867 && ix86_in_large_data_p (decl))
25868 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
25871 /* Worker function for REVERSE_CONDITION. */
25874 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
25876 return (mode != CCFPmode && mode != CCFPUmode
25877 ? reverse_condition (code)
25878 : reverse_condition_maybe_unordered (code));
25881 /* Output code to perform an x87 FP register move, from OPERANDS[1]
25885 output_387_reg_move (rtx insn, rtx *operands)
25887 if (REG_P (operands[0]))
25889 if (REG_P (operands[1])
25890 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
25892 if (REGNO (operands[0]) == FIRST_STACK_REG)
25893 return output_387_ffreep (operands, 0);
25894 return "fstp\t%y0";
25896 if (STACK_TOP_P (operands[0]))
25897 return "fld%z1\t%y1";
25900 else if (MEM_P (operands[0]))
25902 gcc_assert (REG_P (operands[1]));
25903 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
25904 return "fstp%z0\t%y0";
25907 /* There is no non-popping store to memory for XFmode.
25908 So if we need one, follow the store with a load. */
25909 if (GET_MODE (operands[0]) == XFmode)
25910 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
25912 return "fst%z0\t%y0";
25919 /* Output code to perform a conditional jump to LABEL, if C2 flag in
25920 FP status register is set. */
25923 ix86_emit_fp_unordered_jump (rtx label)
25925 rtx reg = gen_reg_rtx (HImode);
25928 emit_insn (gen_x86_fnstsw_1 (reg));
25930 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
25932 emit_insn (gen_x86_sahf_1 (reg));
25934 temp = gen_rtx_REG (CCmode, FLAGS_REG);
25935 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
25939 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
25941 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25942 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
25945 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
25946 gen_rtx_LABEL_REF (VOIDmode, label),
25948 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
25950 emit_jump_insn (temp);
25951 predict_jump (REG_BR_PROB_BASE * 10 / 100);
25954 /* Output code to perform a log1p XFmode calculation. */
25956 void ix86_emit_i387_log1p (rtx op0, rtx op1)
25958 rtx label1 = gen_label_rtx ();
25959 rtx label2 = gen_label_rtx ();
25961 rtx tmp = gen_reg_rtx (XFmode);
25962 rtx tmp2 = gen_reg_rtx (XFmode);
25964 emit_insn (gen_absxf2 (tmp, op1));
25965 emit_insn (gen_cmpxf (tmp,
25966 CONST_DOUBLE_FROM_REAL_VALUE (
25967 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
25969 emit_jump_insn (gen_bge (label1));
25971 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
25972 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
25973 emit_jump (label2);
25975 emit_label (label1);
25976 emit_move_insn (tmp, CONST1_RTX (XFmode));
25977 emit_insn (gen_addxf3 (tmp, op1, tmp));
25978 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
25979 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
25981 emit_label (label2);
25984 /* Output code to perform a Newton-Rhapson approximation of a single precision
25985 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
25987 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
25989 rtx x0, x1, e0, e1, two;
25991 x0 = gen_reg_rtx (mode);
25992 e0 = gen_reg_rtx (mode);
25993 e1 = gen_reg_rtx (mode);
25994 x1 = gen_reg_rtx (mode);
25996 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
25998 if (VECTOR_MODE_P (mode))
25999 two = ix86_build_const_vector (SFmode, true, two);
26001 two = force_reg (mode, two);
26003 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
26005 /* x0 = rcp(b) estimate */
26006 emit_insn (gen_rtx_SET (VOIDmode, x0,
26007 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
26010 emit_insn (gen_rtx_SET (VOIDmode, e0,
26011 gen_rtx_MULT (mode, x0, b)));
26013 emit_insn (gen_rtx_SET (VOIDmode, e1,
26014 gen_rtx_MINUS (mode, two, e0)));
26016 emit_insn (gen_rtx_SET (VOIDmode, x1,
26017 gen_rtx_MULT (mode, x0, e1)));
26019 emit_insn (gen_rtx_SET (VOIDmode, res,
26020 gen_rtx_MULT (mode, a, x1)));
26023 /* Output code to perform a Newton-Rhapson approximation of a
26024 single precision floating point [reciprocal] square root. */
26026 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
26029 rtx x0, e0, e1, e2, e3, mthree, mhalf;
26032 x0 = gen_reg_rtx (mode);
26033 e0 = gen_reg_rtx (mode);
26034 e1 = gen_reg_rtx (mode);
26035 e2 = gen_reg_rtx (mode);
26036 e3 = gen_reg_rtx (mode);
26038 real_from_integer (&r, VOIDmode, -3, -1, 0);
26039 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
26041 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
26042 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
26044 if (VECTOR_MODE_P (mode))
26046 mthree = ix86_build_const_vector (SFmode, true, mthree);
26047 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
26050 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
26051 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
26053 /* x0 = rsqrt(a) estimate */
26054 emit_insn (gen_rtx_SET (VOIDmode, x0,
26055 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
26058 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
26063 zero = gen_reg_rtx (mode);
26064 mask = gen_reg_rtx (mode);
26066 zero = force_reg (mode, CONST0_RTX(mode));
26067 emit_insn (gen_rtx_SET (VOIDmode, mask,
26068 gen_rtx_NE (mode, zero, a)));
26070 emit_insn (gen_rtx_SET (VOIDmode, x0,
26071 gen_rtx_AND (mode, x0, mask)));
26075 emit_insn (gen_rtx_SET (VOIDmode, e0,
26076 gen_rtx_MULT (mode, x0, a)));
26078 emit_insn (gen_rtx_SET (VOIDmode, e1,
26079 gen_rtx_MULT (mode, e0, x0)));
26082 mthree = force_reg (mode, mthree);
26083 emit_insn (gen_rtx_SET (VOIDmode, e2,
26084 gen_rtx_PLUS (mode, e1, mthree)));
26086 mhalf = force_reg (mode, mhalf);
26088 /* e3 = -.5 * x0 */
26089 emit_insn (gen_rtx_SET (VOIDmode, e3,
26090 gen_rtx_MULT (mode, x0, mhalf)));
26092 /* e3 = -.5 * e0 */
26093 emit_insn (gen_rtx_SET (VOIDmode, e3,
26094 gen_rtx_MULT (mode, e0, mhalf)));
26095 /* ret = e2 * e3 */
26096 emit_insn (gen_rtx_SET (VOIDmode, res,
26097 gen_rtx_MULT (mode, e2, e3)));
26100 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
26102 static void ATTRIBUTE_UNUSED
26103 i386_solaris_elf_named_section (const char *name, unsigned int flags,
26106 /* With Binutils 2.15, the "@unwind" marker must be specified on
26107 every occurrence of the ".eh_frame" section, not just the first
26110 && strcmp (name, ".eh_frame") == 0)
26112 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
26113 flags & SECTION_WRITE ? "aw" : "a");
26116 default_elf_asm_named_section (name, flags, decl);
26119 /* Return the mangling of TYPE if it is an extended fundamental type. */
26121 static const char *
26122 ix86_mangle_type (const_tree type)
26124 type = TYPE_MAIN_VARIANT (type);
26126 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
26127 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
26130 switch (TYPE_MODE (type))
26133 /* __float128 is "g". */
26136 /* "long double" or __float80 is "e". */
26143 /* For 32-bit code we can save PIC register setup by using
26144 __stack_chk_fail_local hidden function instead of calling
26145 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
26146 register, so it is better to call __stack_chk_fail directly. */
26149 ix86_stack_protect_fail (void)
26151 return TARGET_64BIT
26152 ? default_external_stack_protect_fail ()
26153 : default_hidden_stack_protect_fail ();
26156 /* Select a format to encode pointers in exception handling data. CODE
26157 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
26158 true if the symbol may be affected by dynamic relocations.
26160 ??? All x86 object file formats are capable of representing this.
26161 After all, the relocation needed is the same as for the call insn.
26162 Whether or not a particular assembler allows us to enter such, I
26163 guess we'll have to see. */
26165 asm_preferred_eh_data_format (int code, int global)
26169 int type = DW_EH_PE_sdata8;
26171 || ix86_cmodel == CM_SMALL_PIC
26172 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
26173 type = DW_EH_PE_sdata4;
26174 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
26176 if (ix86_cmodel == CM_SMALL
26177 || (ix86_cmodel == CM_MEDIUM && code))
26178 return DW_EH_PE_udata4;
26179 return DW_EH_PE_absptr;
26182 /* Expand copysign from SIGN to the positive value ABS_VALUE
26183 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
26186 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
26188 enum machine_mode mode = GET_MODE (sign);
26189 rtx sgn = gen_reg_rtx (mode);
26190 if (mask == NULL_RTX)
26192 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
26193 if (!VECTOR_MODE_P (mode))
26195 /* We need to generate a scalar mode mask in this case. */
26196 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
26197 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
26198 mask = gen_reg_rtx (mode);
26199 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
26203 mask = gen_rtx_NOT (mode, mask);
26204 emit_insn (gen_rtx_SET (VOIDmode, sgn,
26205 gen_rtx_AND (mode, mask, sign)));
26206 emit_insn (gen_rtx_SET (VOIDmode, result,
26207 gen_rtx_IOR (mode, abs_value, sgn)));
26210 /* Expand fabs (OP0) and return a new rtx that holds the result. The
26211 mask for masking out the sign-bit is stored in *SMASK, if that is
26214 ix86_expand_sse_fabs (rtx op0, rtx *smask)
26216 enum machine_mode mode = GET_MODE (op0);
26219 xa = gen_reg_rtx (mode);
26220 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
26221 if (!VECTOR_MODE_P (mode))
26223 /* We need to generate a scalar mode mask in this case. */
26224 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
26225 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
26226 mask = gen_reg_rtx (mode);
26227 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
26229 emit_insn (gen_rtx_SET (VOIDmode, xa,
26230 gen_rtx_AND (mode, op0, mask)));
26238 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
26239 swapping the operands if SWAP_OPERANDS is true. The expanded
26240 code is a forward jump to a newly created label in case the
26241 comparison is true. The generated label rtx is returned. */
26243 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
26244 bool swap_operands)
26255 label = gen_label_rtx ();
26256 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
26257 emit_insn (gen_rtx_SET (VOIDmode, tmp,
26258 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
26259 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
26260 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
26261 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
26262 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
26263 JUMP_LABEL (tmp) = label;
26268 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
26269 using comparison code CODE. Operands are swapped for the comparison if
26270 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
26272 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
26273 bool swap_operands)
26275 enum machine_mode mode = GET_MODE (op0);
26276 rtx mask = gen_reg_rtx (mode);
26285 if (mode == DFmode)
26286 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
26287 gen_rtx_fmt_ee (code, mode, op0, op1)));
26289 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
26290 gen_rtx_fmt_ee (code, mode, op0, op1)));
26295 /* Generate and return a rtx of mode MODE for 2**n where n is the number
26296 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
26298 ix86_gen_TWO52 (enum machine_mode mode)
26300 REAL_VALUE_TYPE TWO52r;
26303 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
26304 TWO52 = const_double_from_real_value (TWO52r, mode);
26305 TWO52 = force_reg (mode, TWO52);
26310 /* Expand SSE sequence for computing lround from OP1 storing
26313 ix86_expand_lround (rtx op0, rtx op1)
26315 /* C code for the stuff we're doing below:
26316 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
26319 enum machine_mode mode = GET_MODE (op1);
26320 const struct real_format *fmt;
26321 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
26324 /* load nextafter (0.5, 0.0) */
26325 fmt = REAL_MODE_FORMAT (mode);
26326 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
26327 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
26329 /* adj = copysign (0.5, op1) */
26330 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
26331 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
26333 /* adj = op1 + adj */
26334 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
26336 /* op0 = (imode)adj */
26337 expand_fix (op0, adj, 0);
26340 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
26343 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
26345 /* C code for the stuff we're doing below (for do_floor):
26347 xi -= (double)xi > op1 ? 1 : 0;
26350 enum machine_mode fmode = GET_MODE (op1);
26351 enum machine_mode imode = GET_MODE (op0);
26352 rtx ireg, freg, label, tmp;
26354 /* reg = (long)op1 */
26355 ireg = gen_reg_rtx (imode);
26356 expand_fix (ireg, op1, 0);
26358 /* freg = (double)reg */
26359 freg = gen_reg_rtx (fmode);
26360 expand_float (freg, ireg, 0);
26362 /* ireg = (freg > op1) ? ireg - 1 : ireg */
26363 label = ix86_expand_sse_compare_and_jump (UNLE,
26364 freg, op1, !do_floor);
26365 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
26366 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
26367 emit_move_insn (ireg, tmp);
26369 emit_label (label);
26370 LABEL_NUSES (label) = 1;
26372 emit_move_insn (op0, ireg);
26375 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
26376 result in OPERAND0. */
26378 ix86_expand_rint (rtx operand0, rtx operand1)
26380 /* C code for the stuff we're doing below:
26381 xa = fabs (operand1);
26382 if (!isless (xa, 2**52))
26384 xa = xa + 2**52 - 2**52;
26385 return copysign (xa, operand1);
26387 enum machine_mode mode = GET_MODE (operand0);
26388 rtx res, xa, label, TWO52, mask;
26390 res = gen_reg_rtx (mode);
26391 emit_move_insn (res, operand1);
26393 /* xa = abs (operand1) */
26394 xa = ix86_expand_sse_fabs (res, &mask);
26396 /* if (!isless (xa, TWO52)) goto label; */
26397 TWO52 = ix86_gen_TWO52 (mode);
26398 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26400 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
26401 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
26403 ix86_sse_copysign_to_positive (res, xa, res, mask);
26405 emit_label (label);
26406 LABEL_NUSES (label) = 1;
26408 emit_move_insn (operand0, res);
26411 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
26414 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
26416 /* C code for the stuff we expand below.
26417 double xa = fabs (x), x2;
26418 if (!isless (xa, TWO52))
26420 xa = xa + TWO52 - TWO52;
26421 x2 = copysign (xa, x);
26430 enum machine_mode mode = GET_MODE (operand0);
26431 rtx xa, TWO52, tmp, label, one, res, mask;
26433 TWO52 = ix86_gen_TWO52 (mode);
26435 /* Temporary for holding the result, initialized to the input
26436 operand to ease control flow. */
26437 res = gen_reg_rtx (mode);
26438 emit_move_insn (res, operand1);
26440 /* xa = abs (operand1) */
26441 xa = ix86_expand_sse_fabs (res, &mask);
26443 /* if (!isless (xa, TWO52)) goto label; */
26444 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26446 /* xa = xa + TWO52 - TWO52; */
26447 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
26448 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
26450 /* xa = copysign (xa, operand1) */
26451 ix86_sse_copysign_to_positive (xa, xa, res, mask);
26453 /* generate 1.0 or -1.0 */
26454 one = force_reg (mode,
26455 const_double_from_real_value (do_floor
26456 ? dconst1 : dconstm1, mode));
26458 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
26459 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
26460 emit_insn (gen_rtx_SET (VOIDmode, tmp,
26461 gen_rtx_AND (mode, one, tmp)));
26462 /* We always need to subtract here to preserve signed zero. */
26463 tmp = expand_simple_binop (mode, MINUS,
26464 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
26465 emit_move_insn (res, tmp);
26467 emit_label (label);
26468 LABEL_NUSES (label) = 1;
26470 emit_move_insn (operand0, res);
26473 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
26476 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
26478 /* C code for the stuff we expand below.
26479 double xa = fabs (x), x2;
26480 if (!isless (xa, TWO52))
26482 x2 = (double)(long)x;
26489 if (HONOR_SIGNED_ZEROS (mode))
26490 return copysign (x2, x);
26493 enum machine_mode mode = GET_MODE (operand0);
26494 rtx xa, xi, TWO52, tmp, label, one, res, mask;
26496 TWO52 = ix86_gen_TWO52 (mode);
26498 /* Temporary for holding the result, initialized to the input
26499 operand to ease control flow. */
26500 res = gen_reg_rtx (mode);
26501 emit_move_insn (res, operand1);
26503 /* xa = abs (operand1) */
26504 xa = ix86_expand_sse_fabs (res, &mask);
26506 /* if (!isless (xa, TWO52)) goto label; */
26507 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26509 /* xa = (double)(long)x */
26510 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
26511 expand_fix (xi, res, 0);
26512 expand_float (xa, xi, 0);
26515 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
26517 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
26518 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
26519 emit_insn (gen_rtx_SET (VOIDmode, tmp,
26520 gen_rtx_AND (mode, one, tmp)));
26521 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
26522 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
26523 emit_move_insn (res, tmp);
26525 if (HONOR_SIGNED_ZEROS (mode))
26526 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
26528 emit_label (label);
26529 LABEL_NUSES (label) = 1;
26531 emit_move_insn (operand0, res);
26534 /* Expand SSE sequence for computing round from OPERAND1 storing
26535 into OPERAND0. Sequence that works without relying on DImode truncation
26536 via cvttsd2siq that is only available on 64bit targets. */
26538 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
26540 /* C code for the stuff we expand below.
26541 double xa = fabs (x), xa2, x2;
26542 if (!isless (xa, TWO52))
26544 Using the absolute value and copying back sign makes
26545 -0.0 -> -0.0 correct.
26546 xa2 = xa + TWO52 - TWO52;
26551 else if (dxa > 0.5)
26553 x2 = copysign (xa2, x);
26556 enum machine_mode mode = GET_MODE (operand0);
26557 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
26559 TWO52 = ix86_gen_TWO52 (mode);
26561 /* Temporary for holding the result, initialized to the input
26562 operand to ease control flow. */
26563 res = gen_reg_rtx (mode);
26564 emit_move_insn (res, operand1);
26566 /* xa = abs (operand1) */
26567 xa = ix86_expand_sse_fabs (res, &mask);
26569 /* if (!isless (xa, TWO52)) goto label; */
26570 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26572 /* xa2 = xa + TWO52 - TWO52; */
26573 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
26574 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
26576 /* dxa = xa2 - xa; */
26577 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
26579 /* generate 0.5, 1.0 and -0.5 */
26580 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
26581 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
26582 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
26586 tmp = gen_reg_rtx (mode);
26587 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
26588 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
26589 emit_insn (gen_rtx_SET (VOIDmode, tmp,
26590 gen_rtx_AND (mode, one, tmp)));
26591 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
26592 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
26593 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
26594 emit_insn (gen_rtx_SET (VOIDmode, tmp,
26595 gen_rtx_AND (mode, one, tmp)));
26596 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
26598 /* res = copysign (xa2, operand1) */
26599 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
26601 emit_label (label);
26602 LABEL_NUSES (label) = 1;
26604 emit_move_insn (operand0, res);
26607 /* Expand SSE sequence for computing trunc from OPERAND1 storing
26610 ix86_expand_trunc (rtx operand0, rtx operand1)
26612 /* C code for SSE variant we expand below.
26613 double xa = fabs (x), x2;
26614 if (!isless (xa, TWO52))
26616 x2 = (double)(long)x;
26617 if (HONOR_SIGNED_ZEROS (mode))
26618 return copysign (x2, x);
26621 enum machine_mode mode = GET_MODE (operand0);
26622 rtx xa, xi, TWO52, label, res, mask;
26624 TWO52 = ix86_gen_TWO52 (mode);
26626 /* Temporary for holding the result, initialized to the input
26627 operand to ease control flow. */
26628 res = gen_reg_rtx (mode);
26629 emit_move_insn (res, operand1);
26631 /* xa = abs (operand1) */
26632 xa = ix86_expand_sse_fabs (res, &mask);
26634 /* if (!isless (xa, TWO52)) goto label; */
26635 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26637 /* x = (double)(long)x */
26638 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
26639 expand_fix (xi, res, 0);
26640 expand_float (res, xi, 0);
26642 if (HONOR_SIGNED_ZEROS (mode))
26643 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
26645 emit_label (label);
26646 LABEL_NUSES (label) = 1;
26648 emit_move_insn (operand0, res);
26651 /* Expand SSE sequence for computing trunc from OPERAND1 storing
26654 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
26656 enum machine_mode mode = GET_MODE (operand0);
26657 rtx xa, mask, TWO52, label, one, res, smask, tmp;
26659 /* C code for SSE variant we expand below.
26660 double xa = fabs (x), x2;
26661 if (!isless (xa, TWO52))
26663 xa2 = xa + TWO52 - TWO52;
26667 x2 = copysign (xa2, x);
26671 TWO52 = ix86_gen_TWO52 (mode);
26673 /* Temporary for holding the result, initialized to the input
26674 operand to ease control flow. */
26675 res = gen_reg_rtx (mode);
26676 emit_move_insn (res, operand1);
26678 /* xa = abs (operand1) */
26679 xa = ix86_expand_sse_fabs (res, &smask);
26681 /* if (!isless (xa, TWO52)) goto label; */
26682 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26684 /* res = xa + TWO52 - TWO52; */
26685 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
26686 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
26687 emit_move_insn (res, tmp);
26690 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
26692 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
26693 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
26694 emit_insn (gen_rtx_SET (VOIDmode, mask,
26695 gen_rtx_AND (mode, mask, one)));
26696 tmp = expand_simple_binop (mode, MINUS,
26697 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
26698 emit_move_insn (res, tmp);
26700 /* res = copysign (res, operand1) */
26701 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
26703 emit_label (label);
26704 LABEL_NUSES (label) = 1;
26706 emit_move_insn (operand0, res);
26709 /* Expand SSE sequence for computing round from OPERAND1 storing
26712 ix86_expand_round (rtx operand0, rtx operand1)
26714 /* C code for the stuff we're doing below:
26715 double xa = fabs (x);
26716 if (!isless (xa, TWO52))
26718 xa = (double)(long)(xa + nextafter (0.5, 0.0));
26719 return copysign (xa, x);
26721 enum machine_mode mode = GET_MODE (operand0);
26722 rtx res, TWO52, xa, label, xi, half, mask;
26723 const struct real_format *fmt;
26724 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
26726 /* Temporary for holding the result, initialized to the input
26727 operand to ease control flow. */
26728 res = gen_reg_rtx (mode);
26729 emit_move_insn (res, operand1);
26731 TWO52 = ix86_gen_TWO52 (mode);
26732 xa = ix86_expand_sse_fabs (res, &mask);
26733 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26735 /* load nextafter (0.5, 0.0) */
26736 fmt = REAL_MODE_FORMAT (mode);
26737 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
26738 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
26740 /* xa = xa + 0.5 */
26741 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
26742 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
26744 /* xa = (double)(int64_t)xa */
26745 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
26746 expand_fix (xi, xa, 0);
26747 expand_float (xa, xi, 0);
26749 /* res = copysign (xa, operand1) */
26750 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
26752 emit_label (label);
26753 LABEL_NUSES (label) = 1;
26755 emit_move_insn (operand0, res);
26759 /* Validate whether a SSE5 instruction is valid or not.
26760 OPERANDS is the array of operands.
26761 NUM is the number of operands.
26762 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
26763 NUM_MEMORY is the maximum number of memory operands to accept.
26764 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
26767 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
26768 bool uses_oc0, int num_memory, bool commutative)
26774 /* Count the number of memory arguments */
26777 for (i = 0; i < num; i++)
26779 enum machine_mode mode = GET_MODE (operands[i]);
26780 if (register_operand (operands[i], mode))
26783 else if (memory_operand (operands[i], mode))
26785 mem_mask |= (1 << i);
26791 rtx pattern = PATTERN (insn);
26793 /* allow 0 for pcmov */
26794 if (GET_CODE (pattern) != SET
26795 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
26797 || operands[i] != CONST0_RTX (mode))
26802 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
26803 a memory operation. */
26804 if (num_memory < 0)
26806 num_memory = -num_memory;
26807 if ((mem_mask & (1 << (num-1))) != 0)
26809 mem_mask &= ~(1 << (num-1));
26814 /* If there were no memory operations, allow the insn */
26818 /* Do not allow the destination register to be a memory operand. */
26819 else if (mem_mask & (1 << 0))
26822 /* If there are too many memory operations, disallow the instruction. While
26823 the hardware only allows 1 memory reference, before register allocation
26824 for some insns, we allow two memory operations sometimes in order to allow
26825 code like the following to be optimized:
26827 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
26829 or similar cases that are vectorized into using the fmaddss
26831 else if (mem_count > num_memory)
26834 /* Don't allow more than one memory operation if not optimizing. */
26835 else if (mem_count > 1 && !optimize)
26838 else if (num == 4 && mem_count == 1)
26840 /* formats (destination is the first argument), example fmaddss:
26841 xmm1, xmm1, xmm2, xmm3/mem
26842 xmm1, xmm1, xmm2/mem, xmm3
26843 xmm1, xmm2, xmm3/mem, xmm1
26844 xmm1, xmm2/mem, xmm3, xmm1 */
26846 return ((mem_mask == (1 << 1))
26847 || (mem_mask == (1 << 2))
26848 || (mem_mask == (1 << 3)));
26850 /* format, example pmacsdd:
26851 xmm1, xmm2, xmm3/mem, xmm1 */
26853 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
26855 return (mem_mask == (1 << 2));
26858 else if (num == 4 && num_memory == 2)
26860 /* If there are two memory operations, we can load one of the memory ops
26861 into the destination register. This is for optimizing the
26862 multiply/add ops, which the combiner has optimized both the multiply
26863 and the add insns to have a memory operation. We have to be careful
26864 that the destination doesn't overlap with the inputs. */
26865 rtx op0 = operands[0];
26867 if (reg_mentioned_p (op0, operands[1])
26868 || reg_mentioned_p (op0, operands[2])
26869 || reg_mentioned_p (op0, operands[3]))
26872 /* formats (destination is the first argument), example fmaddss:
26873 xmm1, xmm1, xmm2, xmm3/mem
26874 xmm1, xmm1, xmm2/mem, xmm3
26875 xmm1, xmm2, xmm3/mem, xmm1
26876 xmm1, xmm2/mem, xmm3, xmm1
26878 For the oc0 case, we will load either operands[1] or operands[3] into
26879 operands[0], so any combination of 2 memory operands is ok. */
26883 /* format, example pmacsdd:
26884 xmm1, xmm2, xmm3/mem, xmm1
26886 For the integer multiply/add instructions be more restrictive and
26887 require operands[2] and operands[3] to be the memory operands. */
26889 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
26891 return (mem_mask == ((1 << 2) | (1 << 3)));
26894 else if (num == 3 && num_memory == 1)
26896 /* formats, example protb:
26897 xmm1, xmm2, xmm3/mem
26898 xmm1, xmm2/mem, xmm3 */
26900 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
26902 /* format, example comeq:
26903 xmm1, xmm2, xmm3/mem */
26905 return (mem_mask == (1 << 2));
26909 gcc_unreachable ();
26915 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
26916 hardware will allow by using the destination register to load one of the
26917 memory operations. Presently this is used by the multiply/add routines to
26918 allow 2 memory references. */
26921 ix86_expand_sse5_multiple_memory (rtx operands[],
26923 enum machine_mode mode)
26925 rtx op0 = operands[0];
26927 || memory_operand (op0, mode)
26928 || reg_mentioned_p (op0, operands[1])
26929 || reg_mentioned_p (op0, operands[2])
26930 || reg_mentioned_p (op0, operands[3]))
26931 gcc_unreachable ();
26933 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
26934 the destination register. */
26935 if (memory_operand (operands[1], mode))
26937 emit_move_insn (op0, operands[1]);
26940 else if (memory_operand (operands[3], mode))
26942 emit_move_insn (op0, operands[3]);
26946 gcc_unreachable ();
26952 /* Table of valid machine attributes. */
26953 static const struct attribute_spec ix86_attribute_table[] =
26955 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
26956 /* Stdcall attribute says callee is responsible for popping arguments
26957 if they are not variable. */
26958 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
26959 /* Fastcall attribute says callee is responsible for popping arguments
26960 if they are not variable. */
26961 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
26962 /* Cdecl attribute says the callee is a normal C declaration */
26963 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
26964 /* Regparm attribute specifies how many integer arguments are to be
26965 passed in registers. */
26966 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
26967 /* Sseregparm attribute says we are using x86_64 calling conventions
26968 for FP arguments. */
26969 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
26970 /* force_align_arg_pointer says this function realigns the stack at entry. */
26971 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
26972 false, true, true, ix86_handle_cconv_attribute },
26973 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26974 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
26975 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
26976 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
26978 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
26979 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
26980 #ifdef SUBTARGET_ATTRIBUTE_TABLE
26981 SUBTARGET_ATTRIBUTE_TABLE,
26983 /* ms_abi and sysv_abi calling convention function attributes. */
26984 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
26985 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
26987 { NULL, 0, 0, false, false, false, NULL }
26990 /* Implement targetm.vectorize.builtin_vectorization_cost. */
26992 x86_builtin_vectorization_cost (bool runtime_test)
26994 /* If the branch of the runtime test is taken - i.e. - the vectorized
26995 version is skipped - this incurs a misprediction cost (because the
26996 vectorized version is expected to be the fall-through). So we subtract
26997 the latency of a mispredicted branch from the costs that are incured
26998 when the vectorized version is executed.
27000 TODO: The values in individual target tables have to be tuned or new
27001 fields may be needed. For eg. on K8, the default branch path is the
27002 not-taken path. If the taken path is predicted correctly, the minimum
27003 penalty of going down the taken-path is 1 cycle. If the taken-path is
27004 not predicted correctly, then the minimum penalty is 10 cycles. */
27008 return (-(ix86_cost->cond_taken_branch_cost));
27014 /* This function returns the calling abi specific va_list type node.
27015 It returns the FNDECL specific va_list type. */
27018 ix86_fn_abi_va_list (tree fndecl)
27023 return va_list_type_node;
27024 gcc_assert (fndecl != NULL_TREE);
27025 abi = ix86_function_abi ((const_tree) fndecl);
27028 return ms_va_list_type_node;
27030 return sysv_va_list_type_node;
27033 /* Returns the canonical va_list type specified by TYPE. If there
27034 is no valid TYPE provided, it return NULL_TREE. */
27037 ix86_canonical_va_list_type (tree type)
27041 /* Resolve references and pointers to va_list type. */
27042 if (INDIRECT_REF_P (type))
27043 type = TREE_TYPE (type);
27044 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
27045 type = TREE_TYPE (type);
27049 wtype = va_list_type_node;
27050 gcc_assert (wtype != NULL_TREE);
27052 if (TREE_CODE (wtype) == ARRAY_TYPE)
27054 /* If va_list is an array type, the argument may have decayed
27055 to a pointer type, e.g. by being passed to another function.
27056 In that case, unwrap both types so that we can compare the
27057 underlying records. */
27058 if (TREE_CODE (htype) == ARRAY_TYPE
27059 || POINTER_TYPE_P (htype))
27061 wtype = TREE_TYPE (wtype);
27062 htype = TREE_TYPE (htype);
27065 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
27066 return va_list_type_node;
27067 wtype = sysv_va_list_type_node;
27068 gcc_assert (wtype != NULL_TREE);
27070 if (TREE_CODE (wtype) == ARRAY_TYPE)
27072 /* If va_list is an array type, the argument may have decayed
27073 to a pointer type, e.g. by being passed to another function.
27074 In that case, unwrap both types so that we can compare the
27075 underlying records. */
27076 if (TREE_CODE (htype) == ARRAY_TYPE
27077 || POINTER_TYPE_P (htype))
27079 wtype = TREE_TYPE (wtype);
27080 htype = TREE_TYPE (htype);
27083 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
27084 return sysv_va_list_type_node;
27085 wtype = ms_va_list_type_node;
27086 gcc_assert (wtype != NULL_TREE);
27088 if (TREE_CODE (wtype) == ARRAY_TYPE)
27090 /* If va_list is an array type, the argument may have decayed
27091 to a pointer type, e.g. by being passed to another function.
27092 In that case, unwrap both types so that we can compare the
27093 underlying records. */
27094 if (TREE_CODE (htype) == ARRAY_TYPE
27095 || POINTER_TYPE_P (htype))
27097 wtype = TREE_TYPE (wtype);
27098 htype = TREE_TYPE (htype);
27101 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
27102 return ms_va_list_type_node;
27105 return std_canonical_va_list_type (type);
27108 /* Iterate through the target-specific builtin types for va_list.
27109 IDX denotes the iterator, *PTREE is set to the result type of
27110 the va_list builtin, and *PNAME to its internal type.
27111 Returns zero if there is no element for this index, otherwise
27112 IDX should be increased upon the next call.
27113 Note, do not iterate a base builtin's name like __builtin_va_list.
27114 Used from c_common_nodes_and_builtins. */
27117 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
27123 *ptree = ms_va_list_type_node;
27124 *pname = "__builtin_ms_va_list";
27127 *ptree = sysv_va_list_type_node;
27128 *pname = "__builtin_sysv_va_list";
27136 /* Initialize the GCC target structure. */
27137 #undef TARGET_RETURN_IN_MEMORY
27138 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
27140 #undef TARGET_ATTRIBUTE_TABLE
27141 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
27142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
27143 # undef TARGET_MERGE_DECL_ATTRIBUTES
27144 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
27147 #undef TARGET_COMP_TYPE_ATTRIBUTES
27148 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
27150 #undef TARGET_INIT_BUILTINS
27151 #define TARGET_INIT_BUILTINS ix86_init_builtins
27152 #undef TARGET_EXPAND_BUILTIN
27153 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
27155 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
27156 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
27157 ix86_builtin_vectorized_function
27159 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
27160 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
27162 #undef TARGET_BUILTIN_RECIPROCAL
27163 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
27165 #undef TARGET_ASM_FUNCTION_EPILOGUE
27166 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
27168 #undef TARGET_ENCODE_SECTION_INFO
27169 #ifndef SUBTARGET_ENCODE_SECTION_INFO
27170 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
27172 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
27175 #undef TARGET_ASM_OPEN_PAREN
27176 #define TARGET_ASM_OPEN_PAREN ""
27177 #undef TARGET_ASM_CLOSE_PAREN
27178 #define TARGET_ASM_CLOSE_PAREN ""
27180 #undef TARGET_ASM_ALIGNED_HI_OP
27181 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
27182 #undef TARGET_ASM_ALIGNED_SI_OP
27183 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
27185 #undef TARGET_ASM_ALIGNED_DI_OP
27186 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
27189 #undef TARGET_ASM_UNALIGNED_HI_OP
27190 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
27191 #undef TARGET_ASM_UNALIGNED_SI_OP
27192 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
27193 #undef TARGET_ASM_UNALIGNED_DI_OP
27194 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
27196 #undef TARGET_SCHED_ADJUST_COST
27197 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
27198 #undef TARGET_SCHED_ISSUE_RATE
27199 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
27200 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
27201 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
27202 ia32_multipass_dfa_lookahead
27204 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
27205 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
27208 #undef TARGET_HAVE_TLS
27209 #define TARGET_HAVE_TLS true
27211 #undef TARGET_CANNOT_FORCE_CONST_MEM
27212 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
27213 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
27214 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
27216 #undef TARGET_DELEGITIMIZE_ADDRESS
27217 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
27219 #undef TARGET_MS_BITFIELD_LAYOUT_P
27220 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
27223 #undef TARGET_BINDS_LOCAL_P
27224 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
27226 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
27227 #undef TARGET_BINDS_LOCAL_P
27228 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
27231 #undef TARGET_ASM_OUTPUT_MI_THUNK
27232 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
27233 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
27234 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
27236 #undef TARGET_ASM_FILE_START
27237 #define TARGET_ASM_FILE_START x86_file_start
27239 #undef TARGET_DEFAULT_TARGET_FLAGS
27240 #define TARGET_DEFAULT_TARGET_FLAGS \
27242 | TARGET_SUBTARGET_DEFAULT \
27243 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
27245 #undef TARGET_HANDLE_OPTION
27246 #define TARGET_HANDLE_OPTION ix86_handle_option
27248 #undef TARGET_RTX_COSTS
27249 #define TARGET_RTX_COSTS ix86_rtx_costs
27250 #undef TARGET_ADDRESS_COST
27251 #define TARGET_ADDRESS_COST ix86_address_cost
27253 #undef TARGET_FIXED_CONDITION_CODE_REGS
27254 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
27255 #undef TARGET_CC_MODES_COMPATIBLE
27256 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
27258 #undef TARGET_MACHINE_DEPENDENT_REORG
27259 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
27261 #undef TARGET_BUILD_BUILTIN_VA_LIST
27262 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
27264 #undef TARGET_FN_ABI_VA_LIST
27265 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
27267 #undef TARGET_CANONICAL_VA_LIST_TYPE
27268 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
27270 #undef TARGET_EXPAND_BUILTIN_VA_START
27271 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
27273 #undef TARGET_MD_ASM_CLOBBERS
27274 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
27276 #undef TARGET_PROMOTE_PROTOTYPES
27277 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
27278 #undef TARGET_STRUCT_VALUE_RTX
27279 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
27280 #undef TARGET_SETUP_INCOMING_VARARGS
27281 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
27282 #undef TARGET_MUST_PASS_IN_STACK
27283 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
27284 #undef TARGET_PASS_BY_REFERENCE
27285 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
27286 #undef TARGET_INTERNAL_ARG_POINTER
27287 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
27288 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
27289 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
27290 #undef TARGET_STRICT_ARGUMENT_NAMING
27291 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
27293 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
27294 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
27296 #undef TARGET_SCALAR_MODE_SUPPORTED_P
27297 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
27299 #undef TARGET_VECTOR_MODE_SUPPORTED_P
27300 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
27302 #undef TARGET_C_MODE_FOR_SUFFIX
27303 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
27306 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
27307 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
27310 #ifdef SUBTARGET_INSERT_ATTRIBUTES
27311 #undef TARGET_INSERT_ATTRIBUTES
27312 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
27315 #undef TARGET_MANGLE_TYPE
27316 #define TARGET_MANGLE_TYPE ix86_mangle_type
27318 #undef TARGET_STACK_PROTECT_FAIL
27319 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
27321 #undef TARGET_FUNCTION_VALUE
27322 #define TARGET_FUNCTION_VALUE ix86_function_value
27324 #undef TARGET_SECONDARY_RELOAD
27325 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
27327 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
27328 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
27330 #undef TARGET_SET_CURRENT_FUNCTION
27331 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
27333 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
27334 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_option_attribute_p
27336 #undef TARGET_OPTION_SAVE
27337 #define TARGET_OPTION_SAVE ix86_function_specific_save
27339 #undef TARGET_OPTION_RESTORE
27340 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
27342 #undef TARGET_OPTION_PRINT
27343 #define TARGET_OPTION_PRINT ix86_function_specific_print
27345 #undef TARGET_OPTION_CAN_INLINE_P
27346 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
27348 struct gcc_target targetm = TARGET_INITIALIZER;
27350 #include "gt-i386.h"