1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 static int x86_builtin_vectorization_cost (bool);
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
823 struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
894 struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
967 struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1039 /* Generic64 should produce code tuned for Nocona and K8. */
1041 struct processor_costs generic64_cost = {
1042 COSTS_N_INSNS (1), /* cost of an add instruction */
1043 /* On all chips taken into consideration lea is 2 cycles and more. With
1044 this cost however our current implementation of synth_mult results in
1045 use of unnecessary temporary registers causing regression on several
1046 SPECfp benchmarks. */
1047 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1048 COSTS_N_INSNS (1), /* variable shift costs */
1049 COSTS_N_INSNS (1), /* constant shift costs */
1050 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1051 COSTS_N_INSNS (4), /* HI */
1052 COSTS_N_INSNS (3), /* SI */
1053 COSTS_N_INSNS (4), /* DI */
1054 COSTS_N_INSNS (2)}, /* other */
1055 0, /* cost of multiply per each bit set */
1056 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1057 COSTS_N_INSNS (26), /* HI */
1058 COSTS_N_INSNS (42), /* SI */
1059 COSTS_N_INSNS (74), /* DI */
1060 COSTS_N_INSNS (74)}, /* other */
1061 COSTS_N_INSNS (1), /* cost of movsx */
1062 COSTS_N_INSNS (1), /* cost of movzx */
1063 8, /* "large" insn */
1064 17, /* MOVE_RATIO */
1065 4, /* cost for loading QImode using movzbl */
1066 {4, 4, 4}, /* cost of loading integer registers
1067 in QImode, HImode and SImode.
1068 Relative to reg-reg move (2). */
1069 {4, 4, 4}, /* cost of storing integer registers */
1070 4, /* cost of reg,reg fld/fst */
1071 {12, 12, 12}, /* cost of loading fp registers
1072 in SFmode, DFmode and XFmode */
1073 {6, 6, 8}, /* cost of storing fp registers
1074 in SFmode, DFmode and XFmode */
1075 2, /* cost of moving MMX register */
1076 {8, 8}, /* cost of loading MMX registers
1077 in SImode and DImode */
1078 {8, 8}, /* cost of storing MMX registers
1079 in SImode and DImode */
1080 2, /* cost of moving SSE register */
1081 {8, 8, 8}, /* cost of loading SSE registers
1082 in SImode, DImode and TImode */
1083 {8, 8, 8}, /* cost of storing SSE registers
1084 in SImode, DImode and TImode */
1085 5, /* MMX or SSE register to integer */
1086 32, /* size of l1 cache. */
1087 512, /* size of l2 cache. */
1088 64, /* size of prefetch block */
1089 6, /* number of parallel prefetches */
1090 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1091 is increased to perhaps more appropriate value of 5. */
1092 3, /* Branch cost */
1093 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1094 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1095 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1096 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1097 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1098 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 {DUMMY_STRINGOP_ALGS,
1102 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1103 1, /* scalar_stmt_cost. */
1104 1, /* scalar load_cost. */
1105 1, /* scalar_store_cost. */
1106 1, /* vec_stmt_cost. */
1107 1, /* vec_to_scalar_cost. */
1108 1, /* scalar_to_vec_cost. */
1109 1, /* vec_align_load_cost. */
1110 2, /* vec_unalign_load_cost. */
1111 1, /* vec_store_cost. */
1112 3, /* cond_taken_branch_cost. */
1113 1, /* cond_not_taken_branch_cost. */
1116 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1118 struct processor_costs generic32_cost = {
1119 COSTS_N_INSNS (1), /* cost of an add instruction */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 256, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 3, /* Branch cost */
1164 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1165 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1166 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1167 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1168 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1169 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1173 DUMMY_STRINGOP_ALGS},
1174 1, /* scalar_stmt_cost. */
1175 1, /* scalar load_cost. */
1176 1, /* scalar_store_cost. */
1177 1, /* vec_stmt_cost. */
1178 1, /* vec_to_scalar_cost. */
1179 1, /* scalar_to_vec_cost. */
1180 1, /* vec_align_load_cost. */
1181 2, /* vec_unalign_load_cost. */
1182 1, /* vec_store_cost. */
1183 3, /* cond_taken_branch_cost. */
1184 1, /* cond_not_taken_branch_cost. */
1187 const struct processor_costs *ix86_cost = &pentium_cost;
1189 /* Processor feature/optimization bitmasks. */
1190 #define m_386 (1<<PROCESSOR_I386)
1191 #define m_486 (1<<PROCESSOR_I486)
1192 #define m_PENT (1<<PROCESSOR_PENTIUM)
1193 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1194 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1195 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1196 #define m_CORE2 (1<<PROCESSOR_CORE2)
1198 #define m_GEODE (1<<PROCESSOR_GEODE)
1199 #define m_K6 (1<<PROCESSOR_K6)
1200 #define m_K6_GEODE (m_K6 | m_GEODE)
1201 #define m_K8 (1<<PROCESSOR_K8)
1202 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1203 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1204 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1205 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1207 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1208 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1210 /* Generic instruction choice should be common subset of supported CPUs
1211 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1212 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1214 /* Feature tests against the various tunings. */
1215 unsigned char ix86_tune_features[X86_TUNE_LAST];
1217 /* Feature tests against the various tunings used to create ix86_tune_features
1218 based on the processor mask. */
1219 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1220 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1221 negatively, so enabling for Generic64 seems like good code size
1222 tradeoff. We can't enable it for 32bit generic because it does not
1223 work well with PPro base chips. */
1224 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1226 /* X86_TUNE_PUSH_MEMORY */
1227 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1228 | m_NOCONA | m_CORE2 | m_GENERIC,
1230 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1233 /* X86_TUNE_UNROLL_STRLEN */
1234 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1236 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1237 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1239 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1240 on simulation result. But after P4 was made, no performance benefit
1241 was observed with branch hints. It also increases the code size.
1242 As a result, icc never generates branch hints. */
1245 /* X86_TUNE_DOUBLE_WITH_ADD */
1248 /* X86_TUNE_USE_SAHF */
1249 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1250 | m_NOCONA | m_CORE2 | m_GENERIC,
1252 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1253 partial dependencies. */
1254 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1255 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1257 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1258 register stalls on Generic32 compilation setting as well. However
1259 in current implementation the partial register stalls are not eliminated
1260 very well - they can be introduced via subregs synthesized by combine
1261 and can happen in caller/callee saving sequences. Because this option
1262 pays back little on PPro based chips and is in conflict with partial reg
1263 dependencies used by Athlon/P4 based chips, it is better to leave it off
1264 for generic32 for now. */
1267 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1268 m_CORE2 | m_GENERIC,
1270 /* X86_TUNE_USE_HIMODE_FIOP */
1271 m_386 | m_486 | m_K6_GEODE,
1273 /* X86_TUNE_USE_SIMODE_FIOP */
1274 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1276 /* X86_TUNE_USE_MOV0 */
1279 /* X86_TUNE_USE_CLTD */
1280 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1282 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1285 /* X86_TUNE_SPLIT_LONG_MOVES */
1288 /* X86_TUNE_READ_MODIFY_WRITE */
1291 /* X86_TUNE_READ_MODIFY */
1294 /* X86_TUNE_PROMOTE_QIMODE */
1295 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1296 | m_GENERIC /* | m_PENT4 ? */,
1298 /* X86_TUNE_FAST_PREFIX */
1299 ~(m_PENT | m_486 | m_386),
1301 /* X86_TUNE_SINGLE_STRINGOP */
1302 m_386 | m_PENT4 | m_NOCONA,
1304 /* X86_TUNE_QIMODE_MATH */
1307 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1308 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1309 might be considered for Generic32 if our scheme for avoiding partial
1310 stalls was more effective. */
1313 /* X86_TUNE_PROMOTE_QI_REGS */
1316 /* X86_TUNE_PROMOTE_HI_REGS */
1319 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1320 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1322 /* X86_TUNE_ADD_ESP_8 */
1323 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1324 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_SUB_ESP_4 */
1327 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1329 /* X86_TUNE_SUB_ESP_8 */
1330 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1331 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1333 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1334 for DFmode copies */
1335 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1336 | m_GENERIC | m_GEODE),
1338 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1339 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1341 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1342 conflict here in between PPro/Pentium4 based chips that thread 128bit
1343 SSE registers as single units versus K8 based chips that divide SSE
1344 registers to two 64bit halves. This knob promotes all store destinations
1345 to be 128bit to allow register renaming on 128bit SSE units, but usually
1346 results in one extra microop on 64bit SSE units. Experimental results
1347 shows that disabling this option on P4 brings over 20% SPECfp regression,
1348 while enabling it on K8 brings roughly 2.4% regression that can be partly
1349 masked by careful scheduling of moves. */
1350 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1352 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1355 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1356 are resolved on SSE register parts instead of whole registers, so we may
1357 maintain just lower part of scalar values in proper format leaving the
1358 upper part undefined. */
1361 /* X86_TUNE_SSE_TYPELESS_STORES */
1364 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1365 m_PPRO | m_PENT4 | m_NOCONA,
1367 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1368 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1370 /* X86_TUNE_PROLOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1373 /* X86_TUNE_EPILOGUE_USING_MOVE */
1374 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1376 /* X86_TUNE_SHIFT1 */
1379 /* X86_TUNE_USE_FFREEP */
1382 /* X86_TUNE_INTER_UNIT_MOVES */
1383 ~(m_AMD_MULTIPLE | m_GENERIC),
1385 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1388 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1389 than 4 branch instructions in the 16 byte window. */
1390 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1392 /* X86_TUNE_SCHEDULE */
1393 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1395 /* X86_TUNE_USE_BT */
1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1398 /* X86_TUNE_USE_INCDEC */
1399 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1401 /* X86_TUNE_PAD_RETURNS */
1402 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1404 /* X86_TUNE_EXT_80387_CONSTANTS */
1405 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1407 /* X86_TUNE_SHORTEN_X87_SSE */
1410 /* X86_TUNE_AVOID_VECTOR_DECODE */
1413 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1414 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1417 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1418 vector path on AMD machines. */
1419 m_K8 | m_GENERIC64 | m_AMDFAM10,
1421 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1423 m_K8 | m_GENERIC64 | m_AMDFAM10,
1425 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1429 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1430 but one byte longer. */
1433 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1434 operand that cannot be represented using a modRM byte. The XOR
1435 replacement is long decoded, so this split helps here as well. */
1438 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1440 m_AMDFAM10 | m_GENERIC,
1442 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1443 from integer to FP. */
1446 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1447 with a subsequent conditional jump instruction into a single
1448 compare-and-branch uop. */
1452 /* Feature tests against the various architecture variations. */
1453 unsigned char ix86_arch_features[X86_ARCH_LAST];
1455 /* Feature tests against the various architecture variations, used to create
1456 ix86_arch_features based on the processor mask. */
1457 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1458 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1459 ~(m_386 | m_486 | m_PENT | m_K6),
1461 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1464 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1467 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1470 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1474 static const unsigned int x86_accumulate_outgoing_args
1475 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1477 static const unsigned int x86_arch_always_fancy_math_387
1478 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1479 | m_NOCONA | m_CORE2 | m_GENERIC;
1481 static enum stringop_alg stringop_alg = no_stringop;
1483 /* In case the average insn count for single function invocation is
1484 lower than this constant, emit fast (but longer) prologue and
1486 #define FAST_PROLOGUE_INSN_COUNT 20
1488 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1489 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1490 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1491 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1493 /* Array of the smallest class containing reg number REGNO, indexed by
1494 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1496 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1498 /* ax, dx, cx, bx */
1499 AREG, DREG, CREG, BREG,
1500 /* si, di, bp, sp */
1501 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1503 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1504 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1507 /* flags, fpsr, fpcr, frame */
1508 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1510 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1513 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1516 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1517 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1518 /* SSE REX registers */
1519 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1523 /* The "default" register map used in 32bit mode. */
1525 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1527 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1528 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1529 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1530 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1531 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1532 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1533 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1536 static int const x86_64_int_parameter_registers[6] =
1538 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1539 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1542 static int const x86_64_ms_abi_int_parameter_registers[4] =
1544 2 /*RCX*/, 1 /*RDX*/,
1545 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1548 static int const x86_64_int_return_registers[4] =
1550 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1553 /* The "default" register map used in 64bit mode. */
1554 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1556 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1557 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1558 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1559 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1560 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1561 8,9,10,11,12,13,14,15, /* extended integer registers */
1562 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1565 /* Define the register numbers to be used in Dwarf debugging information.
1566 The SVR4 reference port C compiler uses the following register numbers
1567 in its Dwarf output code:
1568 0 for %eax (gcc regno = 0)
1569 1 for %ecx (gcc regno = 2)
1570 2 for %edx (gcc regno = 1)
1571 3 for %ebx (gcc regno = 3)
1572 4 for %esp (gcc regno = 7)
1573 5 for %ebp (gcc regno = 6)
1574 6 for %esi (gcc regno = 4)
1575 7 for %edi (gcc regno = 5)
1576 The following three DWARF register numbers are never generated by
1577 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1578 believes these numbers have these meanings.
1579 8 for %eip (no gcc equivalent)
1580 9 for %eflags (gcc regno = 17)
1581 10 for %trapno (no gcc equivalent)
1582 It is not at all clear how we should number the FP stack registers
1583 for the x86 architecture. If the version of SDB on x86/svr4 were
1584 a bit less brain dead with respect to floating-point then we would
1585 have a precedent to follow with respect to DWARF register numbers
1586 for x86 FP registers, but the SDB on x86/svr4 is so completely
1587 broken with respect to FP registers that it is hardly worth thinking
1588 of it as something to strive for compatibility with.
1589 The version of x86/svr4 SDB I have at the moment does (partially)
1590 seem to believe that DWARF register number 11 is associated with
1591 the x86 register %st(0), but that's about all. Higher DWARF
1592 register numbers don't seem to be associated with anything in
1593 particular, and even for DWARF regno 11, SDB only seems to under-
1594 stand that it should say that a variable lives in %st(0) (when
1595 asked via an `=' command) if we said it was in DWARF regno 11,
1596 but SDB still prints garbage when asked for the value of the
1597 variable in question (via a `/' command).
1598 (Also note that the labels SDB prints for various FP stack regs
1599 when doing an `x' command are all wrong.)
1600 Note that these problems generally don't affect the native SVR4
1601 C compiler because it doesn't allow the use of -O with -g and
1602 because when it is *not* optimizing, it allocates a memory
1603 location for each floating-point variable, and the memory
1604 location is what gets described in the DWARF AT_location
1605 attribute for the variable in question.
1606 Regardless of the severe mental illness of the x86/svr4 SDB, we
1607 do something sensible here and we use the following DWARF
1608 register numbers. Note that these are all stack-top-relative
1610 11 for %st(0) (gcc regno = 8)
1611 12 for %st(1) (gcc regno = 9)
1612 13 for %st(2) (gcc regno = 10)
1613 14 for %st(3) (gcc regno = 11)
1614 15 for %st(4) (gcc regno = 12)
1615 16 for %st(5) (gcc regno = 13)
1616 17 for %st(6) (gcc regno = 14)
1617 18 for %st(7) (gcc regno = 15)
1619 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1621 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1622 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1623 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1624 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1625 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1626 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1627 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1630 /* Test and compare insns in i386.md store the information needed to
1631 generate branch and scc insns here. */
1633 rtx ix86_compare_op0 = NULL_RTX;
1634 rtx ix86_compare_op1 = NULL_RTX;
1635 rtx ix86_compare_emitted = NULL_RTX;
1637 /* Define the structure for the machine field in struct function. */
1639 struct stack_local_entry GTY(())
1641 unsigned short mode;
1644 struct stack_local_entry *next;
1647 /* Structure describing stack frame layout.
1648 Stack grows downward:
1654 saved frame pointer if frame_pointer_needed
1655 <- HARD_FRAME_POINTER
1664 [va_arg registers] (
1665 > to_allocate <- FRAME_POINTER
1677 HOST_WIDE_INT frame;
1679 int outgoing_arguments_size;
1682 HOST_WIDE_INT to_allocate;
1683 /* The offsets relative to ARG_POINTER. */
1684 HOST_WIDE_INT frame_pointer_offset;
1685 HOST_WIDE_INT hard_frame_pointer_offset;
1686 HOST_WIDE_INT stack_pointer_offset;
1688 /* When save_regs_using_mov is set, emit prologue using
1689 move instead of push instructions. */
1690 bool save_regs_using_mov;
1693 /* Code model option. */
1694 enum cmodel ix86_cmodel;
1696 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1698 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1700 /* Which unit we are generating floating point math for. */
1701 enum fpmath_unit ix86_fpmath;
1703 /* Which cpu are we scheduling for. */
1704 enum attr_cpu ix86_schedule;
1706 /* Which cpu are we optimizing for. */
1707 enum processor_type ix86_tune;
1709 /* Which instruction set architecture to use. */
1710 enum processor_type ix86_arch;
1712 /* true if sse prefetch instruction is not NOOP. */
1713 int x86_prefetch_sse;
1715 /* ix86_regparm_string as a number */
1716 static int ix86_regparm;
1718 /* -mstackrealign option */
1719 extern int ix86_force_align_arg_pointer;
1720 static const char ix86_force_align_arg_pointer_string[]
1721 = "force_align_arg_pointer";
1723 static rtx (*ix86_gen_leave) (void);
1724 static rtx (*ix86_gen_pop1) (rtx);
1725 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1726 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1727 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1728 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1729 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1730 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1732 /* Preferred alignment for stack boundary in bits. */
1733 unsigned int ix86_preferred_stack_boundary;
1735 /* Alignment for incoming stack boundary in bits specified at
1737 static unsigned int ix86_user_incoming_stack_boundary;
1739 /* Default alignment for incoming stack boundary in bits. */
1740 static unsigned int ix86_default_incoming_stack_boundary;
1742 /* Alignment for incoming stack boundary in bits. */
1743 unsigned int ix86_incoming_stack_boundary;
1745 /* Values 1-5: see jump.c */
1746 int ix86_branch_cost;
1748 /* Calling abi specific va_list type nodes. */
1749 static GTY(()) tree sysv_va_list_type_node;
1750 static GTY(()) tree ms_va_list_type_node;
1752 /* Variables which are this size or smaller are put in the data/bss
1753 or ldata/lbss sections. */
1755 int ix86_section_threshold = 65536;
1757 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1758 char internal_label_prefix[16];
1759 int internal_label_prefix_len;
1761 /* Fence to use after loop using movnt. */
1764 /* Register class used for passing given 64bit part of the argument.
1765 These represent classes as documented by the PS ABI, with the exception
1766 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1767 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1769 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1770 whenever possible (upper half does contain padding). */
1771 enum x86_64_reg_class
1774 X86_64_INTEGER_CLASS,
1775 X86_64_INTEGERSI_CLASS,
1783 X86_64_COMPLEX_X87_CLASS,
1786 static const char * const x86_64_reg_class_name[] =
1788 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1789 "sseup", "x87", "x87up", "cplx87", "no"
1792 #define MAX_CLASSES 4
1794 /* Table of constants used by fldpi, fldln2, etc.... */
1795 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1796 static bool ext_80387_constants_init = 0;
1799 static struct machine_function * ix86_init_machine_status (void);
1800 static rtx ix86_function_value (const_tree, const_tree, bool);
1801 static int ix86_function_regparm (const_tree, const_tree);
1802 static void ix86_compute_frame_layout (struct ix86_frame *);
1803 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1805 static void ix86_add_new_builtins (int);
1807 enum ix86_function_specific_strings
1809 IX86_FUNCTION_SPECIFIC_ARCH,
1810 IX86_FUNCTION_SPECIFIC_TUNE,
1811 IX86_FUNCTION_SPECIFIC_FPMATH,
1812 IX86_FUNCTION_SPECIFIC_MAX
1815 static char *ix86_target_string (int, int, const char *, const char *,
1816 const char *, bool);
1817 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1818 static void ix86_function_specific_save (struct cl_target_option *);
1819 static void ix86_function_specific_restore (struct cl_target_option *);
1820 static void ix86_function_specific_print (FILE *, int,
1821 struct cl_target_option *);
1822 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1823 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1824 static bool ix86_can_inline_p (tree, tree);
1825 static void ix86_set_current_function (tree);
1828 /* The svr4 ABI for the i386 says that records and unions are returned
1830 #ifndef DEFAULT_PCC_STRUCT_RETURN
1831 #define DEFAULT_PCC_STRUCT_RETURN 1
1834 /* Whether -mtune= or -march= were specified */
1835 static int ix86_tune_defaulted;
1836 static int ix86_arch_specified;
1838 /* Bit flags that specify the ISA we are compiling for. */
1839 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1841 /* A mask of ix86_isa_flags that includes bit X if X
1842 was set or cleared on the command line. */
1843 static int ix86_isa_flags_explicit;
1845 /* Define a set of ISAs which are available when a given ISA is
1846 enabled. MMX and SSE ISAs are handled separately. */
1848 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1849 #define OPTION_MASK_ISA_3DNOW_SET \
1850 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1852 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1853 #define OPTION_MASK_ISA_SSE2_SET \
1854 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1855 #define OPTION_MASK_ISA_SSE3_SET \
1856 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1857 #define OPTION_MASK_ISA_SSSE3_SET \
1858 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1859 #define OPTION_MASK_ISA_SSE4_1_SET \
1860 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1861 #define OPTION_MASK_ISA_SSE4_2_SET \
1862 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1863 #define OPTION_MASK_ISA_AVX_SET \
1864 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1865 #define OPTION_MASK_ISA_FMA_SET \
1866 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1868 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1870 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1872 #define OPTION_MASK_ISA_SSE4A_SET \
1873 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1874 #define OPTION_MASK_ISA_SSE5_SET \
1875 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1877 /* AES and PCLMUL need SSE2 because they use xmm registers */
1878 #define OPTION_MASK_ISA_AES_SET \
1879 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1880 #define OPTION_MASK_ISA_PCLMUL_SET \
1881 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1883 #define OPTION_MASK_ISA_ABM_SET \
1884 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1885 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1886 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1887 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1889 /* Define a set of ISAs which aren't available when a given ISA is
1890 disabled. MMX and SSE ISAs are handled separately. */
1892 #define OPTION_MASK_ISA_MMX_UNSET \
1893 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1894 #define OPTION_MASK_ISA_3DNOW_UNSET \
1895 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1896 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1898 #define OPTION_MASK_ISA_SSE_UNSET \
1899 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1900 #define OPTION_MASK_ISA_SSE2_UNSET \
1901 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1902 #define OPTION_MASK_ISA_SSE3_UNSET \
1903 (OPTION_MASK_ISA_SSE3 \
1904 | OPTION_MASK_ISA_SSSE3_UNSET \
1905 | OPTION_MASK_ISA_SSE4A_UNSET )
1906 #define OPTION_MASK_ISA_SSSE3_UNSET \
1907 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1908 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1909 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1910 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1911 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1912 #define OPTION_MASK_ISA_AVX_UNSET \
1913 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1914 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1916 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1918 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1920 #define OPTION_MASK_ISA_SSE4A_UNSET \
1921 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1922 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1923 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1924 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1925 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1926 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1927 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1928 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1930 /* Vectorization library interface and handlers. */
1931 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1932 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1933 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1935 /* Processor target table, indexed by processor number */
1938 const struct processor_costs *cost; /* Processor costs */
1939 const int align_loop; /* Default alignments. */
1940 const int align_loop_max_skip;
1941 const int align_jump;
1942 const int align_jump_max_skip;
1943 const int align_func;
1946 static const struct ptt processor_target_table[PROCESSOR_max] =
1948 {&i386_cost, 4, 3, 4, 3, 4},
1949 {&i486_cost, 16, 15, 16, 15, 16},
1950 {&pentium_cost, 16, 7, 16, 7, 16},
1951 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1952 {&geode_cost, 0, 0, 0, 0, 0},
1953 {&k6_cost, 32, 7, 32, 7, 32},
1954 {&athlon_cost, 16, 7, 16, 7, 16},
1955 {&pentium4_cost, 0, 0, 0, 0, 0},
1956 {&k8_cost, 16, 7, 16, 7, 16},
1957 {&nocona_cost, 0, 0, 0, 0, 0},
1958 {&core2_cost, 16, 10, 16, 10, 16},
1959 {&generic32_cost, 16, 7, 16, 7, 16},
1960 {&generic64_cost, 16, 10, 16, 10, 16},
1961 {&amdfam10_cost, 32, 24, 32, 7, 32}
1964 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1989 /* Implement TARGET_HANDLE_OPTION. */
1992 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1999 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2000 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2004 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2005 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2012 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2013 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2017 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2018 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2028 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2029 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2033 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2034 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2041 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2042 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2046 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2047 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2054 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2055 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2059 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2060 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2067 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2068 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2072 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2073 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2080 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2081 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2085 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2086 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2093 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2094 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2098 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2099 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2106 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2107 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2111 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2112 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2119 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2120 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2124 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2125 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2130 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2131 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2135 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2136 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2142 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2143 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2147 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2148 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2155 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2156 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2160 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2161 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2168 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2169 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2173 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2174 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2181 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2182 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2186 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2187 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2194 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2195 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2199 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2200 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2207 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2208 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2212 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2213 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2220 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2221 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2225 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2226 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2233 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2238 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2239 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2248 /* Return a string the documents the current -m options. The caller is
2249 responsible for freeing the string. */
2252 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2253 const char *fpmath, bool add_nl_p)
2255 struct ix86_target_opts
2257 const char *option; /* option string */
2258 int mask; /* isa mask options */
2261 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2262 preceding options while match those first. */
2263 static struct ix86_target_opts isa_opts[] =
2265 { "-m64", OPTION_MASK_ISA_64BIT },
2266 { "-msse5", OPTION_MASK_ISA_SSE5 },
2267 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2268 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2269 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2270 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2271 { "-msse3", OPTION_MASK_ISA_SSE3 },
2272 { "-msse2", OPTION_MASK_ISA_SSE2 },
2273 { "-msse", OPTION_MASK_ISA_SSE },
2274 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2275 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2276 { "-mmmx", OPTION_MASK_ISA_MMX },
2277 { "-mabm", OPTION_MASK_ISA_ABM },
2278 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2279 { "-maes", OPTION_MASK_ISA_AES },
2280 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2284 static struct ix86_target_opts flag_opts[] =
2286 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2287 { "-m80387", MASK_80387 },
2288 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2289 { "-malign-double", MASK_ALIGN_DOUBLE },
2290 { "-mcld", MASK_CLD },
2291 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2292 { "-mieee-fp", MASK_IEEE_FP },
2293 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2294 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2295 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2296 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2297 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2298 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2299 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2300 { "-mno-red-zone", MASK_NO_RED_ZONE },
2301 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2302 { "-mrecip", MASK_RECIP },
2303 { "-mrtd", MASK_RTD },
2304 { "-msseregparm", MASK_SSEREGPARM },
2305 { "-mstack-arg-probe", MASK_STACK_PROBE },
2306 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2309 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2312 char target_other[40];
2321 memset (opts, '\0', sizeof (opts));
2323 /* Add -march= option. */
2326 opts[num][0] = "-march=";
2327 opts[num++][1] = arch;
2330 /* Add -mtune= option. */
2333 opts[num][0] = "-mtune=";
2334 opts[num++][1] = tune;
2337 /* Pick out the options in isa options. */
2338 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2340 if ((isa & isa_opts[i].mask) != 0)
2342 opts[num++][0] = isa_opts[i].option;
2343 isa &= ~ isa_opts[i].mask;
2347 if (isa && add_nl_p)
2349 opts[num++][0] = isa_other;
2350 sprintf (isa_other, "(other isa: 0x%x)", isa);
2353 /* Add flag options. */
2354 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2356 if ((flags & flag_opts[i].mask) != 0)
2358 opts[num++][0] = flag_opts[i].option;
2359 flags &= ~ flag_opts[i].mask;
2363 if (flags && add_nl_p)
2365 opts[num++][0] = target_other;
2366 sprintf (target_other, "(other flags: 0x%x)", isa);
2369 /* Add -fpmath= option. */
2372 opts[num][0] = "-mfpmath=";
2373 opts[num++][1] = fpmath;
2380 gcc_assert (num < ARRAY_SIZE (opts));
2382 /* Size the string. */
2384 sep_len = (add_nl_p) ? 3 : 1;
2385 for (i = 0; i < num; i++)
2388 for (j = 0; j < 2; j++)
2390 len += strlen (opts[i][j]);
2393 /* Build the string. */
2394 ret = ptr = (char *) xmalloc (len);
2397 for (i = 0; i < num; i++)
2401 for (j = 0; j < 2; j++)
2402 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2409 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2417 for (j = 0; j < 2; j++)
2420 memcpy (ptr, opts[i][j], len2[j]);
2422 line_len += len2[j];
2427 gcc_assert (ret + len >= ptr);
2432 /* Function that is callable from the debugger to print the current
2435 ix86_debug_options (void)
2437 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2438 ix86_arch_string, ix86_tune_string,
2439 ix86_fpmath_string, true);
2443 fprintf (stderr, "%s\n\n", opts);
2447 fprintf (stderr, "<no options>\n\n");
2452 /* Sometimes certain combinations of command options do not make
2453 sense on a particular target machine. You can define a macro
2454 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2455 defined, is executed once just after all the command options have
2458 Don't use this macro to turn on various extra optimizations for
2459 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2462 override_options (bool main_args_p)
2465 unsigned int ix86_arch_mask, ix86_tune_mask;
2470 /* Comes from final.c -- no real reason to change it. */
2471 #define MAX_CODE_ALIGN 16
2479 PTA_PREFETCH_SSE = 1 << 4,
2481 PTA_3DNOW_A = 1 << 6,
2485 PTA_POPCNT = 1 << 10,
2487 PTA_SSE4A = 1 << 12,
2488 PTA_NO_SAHF = 1 << 13,
2489 PTA_SSE4_1 = 1 << 14,
2490 PTA_SSE4_2 = 1 << 15,
2493 PTA_PCLMUL = 1 << 18,
2500 const char *const name; /* processor name or nickname. */
2501 const enum processor_type processor;
2502 const enum attr_cpu schedule;
2503 const unsigned /*enum pta_flags*/ flags;
2505 const processor_alias_table[] =
2507 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2508 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2509 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2510 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2511 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2512 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2513 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2514 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2515 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2516 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2517 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2518 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2519 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2521 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2523 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2524 PTA_MMX | PTA_SSE | PTA_SSE2},
2525 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2526 PTA_MMX |PTA_SSE | PTA_SSE2},
2527 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2528 PTA_MMX | PTA_SSE | PTA_SSE2},
2529 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2530 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2531 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2532 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2533 | PTA_CX16 | PTA_NO_SAHF},
2534 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2535 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2536 | PTA_SSSE3 | PTA_CX16},
2537 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2538 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2539 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2540 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2541 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2542 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2543 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2544 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2545 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2546 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2547 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2548 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2549 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2550 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2551 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2552 {"x86-64", PROCESSOR_K8, CPU_K8,
2553 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2554 {"k8", PROCESSOR_K8, CPU_K8,
2555 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2556 | PTA_SSE2 | PTA_NO_SAHF},
2557 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2558 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2559 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2560 {"opteron", PROCESSOR_K8, CPU_K8,
2561 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2562 | PTA_SSE2 | PTA_NO_SAHF},
2563 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2564 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2565 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2566 {"athlon64", PROCESSOR_K8, CPU_K8,
2567 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2568 | PTA_SSE2 | PTA_NO_SAHF},
2569 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2570 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2571 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2572 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2573 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2574 | PTA_SSE2 | PTA_NO_SAHF},
2575 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2576 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2577 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2578 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2579 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2580 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2581 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2582 0 /* flags are only used for -march switch. */ },
2583 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2584 PTA_64BIT /* flags are only used for -march switch. */ },
2587 int const pta_size = ARRAY_SIZE (processor_alias_table);
2589 /* Set up prefix/suffix so the error messages refer to either the command
2590 line argument, or the attribute(target). */
2599 prefix = "option(\"";
2604 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2605 SUBTARGET_OVERRIDE_OPTIONS;
2608 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2609 SUBSUBTARGET_OVERRIDE_OPTIONS;
2612 /* -fPIC is the default for x86_64. */
2613 if (TARGET_MACHO && TARGET_64BIT)
2616 /* Set the default values for switches whose default depends on TARGET_64BIT
2617 in case they weren't overwritten by command line options. */
2620 /* Mach-O doesn't support omitting the frame pointer for now. */
2621 if (flag_omit_frame_pointer == 2)
2622 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2623 if (flag_asynchronous_unwind_tables == 2)
2624 flag_asynchronous_unwind_tables = 1;
2625 if (flag_pcc_struct_return == 2)
2626 flag_pcc_struct_return = 0;
2630 if (flag_omit_frame_pointer == 2)
2631 flag_omit_frame_pointer = 0;
2632 if (flag_asynchronous_unwind_tables == 2)
2633 flag_asynchronous_unwind_tables = 0;
2634 if (flag_pcc_struct_return == 2)
2635 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2638 /* Need to check -mtune=generic first. */
2639 if (ix86_tune_string)
2641 if (!strcmp (ix86_tune_string, "generic")
2642 || !strcmp (ix86_tune_string, "i686")
2643 /* As special support for cross compilers we read -mtune=native
2644 as -mtune=generic. With native compilers we won't see the
2645 -mtune=native, as it was changed by the driver. */
2646 || !strcmp (ix86_tune_string, "native"))
2649 ix86_tune_string = "generic64";
2651 ix86_tune_string = "generic32";
2653 /* If this call is for setting the option attribute, allow the
2654 generic32/generic64 that was previously set. */
2655 else if (!main_args_p
2656 && (!strcmp (ix86_tune_string, "generic32")
2657 || !strcmp (ix86_tune_string, "generic64")))
2659 else if (!strncmp (ix86_tune_string, "generic", 7))
2660 error ("bad value (%s) for %stune=%s %s",
2661 ix86_tune_string, prefix, suffix, sw);
2665 if (ix86_arch_string)
2666 ix86_tune_string = ix86_arch_string;
2667 if (!ix86_tune_string)
2669 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2670 ix86_tune_defaulted = 1;
2673 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2674 need to use a sensible tune option. */
2675 if (!strcmp (ix86_tune_string, "generic")
2676 || !strcmp (ix86_tune_string, "x86-64")
2677 || !strcmp (ix86_tune_string, "i686"))
2680 ix86_tune_string = "generic64";
2682 ix86_tune_string = "generic32";
2685 if (ix86_stringop_string)
2687 if (!strcmp (ix86_stringop_string, "rep_byte"))
2688 stringop_alg = rep_prefix_1_byte;
2689 else if (!strcmp (ix86_stringop_string, "libcall"))
2690 stringop_alg = libcall;
2691 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2692 stringop_alg = rep_prefix_4_byte;
2693 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2695 /* rep; movq isn't available in 32-bit code. */
2696 stringop_alg = rep_prefix_8_byte;
2697 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2698 stringop_alg = loop_1_byte;
2699 else if (!strcmp (ix86_stringop_string, "loop"))
2700 stringop_alg = loop;
2701 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2702 stringop_alg = unrolled_loop;
2704 error ("bad value (%s) for %sstringop-strategy=%s %s",
2705 ix86_stringop_string, prefix, suffix, sw);
2707 if (!strcmp (ix86_tune_string, "x86-64"))
2708 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2709 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2710 prefix, suffix, prefix, suffix, prefix, suffix);
2712 if (!ix86_arch_string)
2713 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2715 ix86_arch_specified = 1;
2717 if (!strcmp (ix86_arch_string, "generic"))
2718 error ("generic CPU can be used only for %stune=%s %s",
2719 prefix, suffix, sw);
2720 if (!strncmp (ix86_arch_string, "generic", 7))
2721 error ("bad value (%s) for %sarch=%s %s",
2722 ix86_arch_string, prefix, suffix, sw);
2724 if (ix86_cmodel_string != 0)
2726 if (!strcmp (ix86_cmodel_string, "small"))
2727 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2728 else if (!strcmp (ix86_cmodel_string, "medium"))
2729 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2730 else if (!strcmp (ix86_cmodel_string, "large"))
2731 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2733 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2734 else if (!strcmp (ix86_cmodel_string, "32"))
2735 ix86_cmodel = CM_32;
2736 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2737 ix86_cmodel = CM_KERNEL;
2739 error ("bad value (%s) for %scmodel=%s %s",
2740 ix86_cmodel_string, prefix, suffix, sw);
2744 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2745 use of rip-relative addressing. This eliminates fixups that
2746 would otherwise be needed if this object is to be placed in a
2747 DLL, and is essentially just as efficient as direct addressing. */
2748 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2749 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2750 else if (TARGET_64BIT)
2751 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2753 ix86_cmodel = CM_32;
2755 if (ix86_asm_string != 0)
2758 && !strcmp (ix86_asm_string, "intel"))
2759 ix86_asm_dialect = ASM_INTEL;
2760 else if (!strcmp (ix86_asm_string, "att"))
2761 ix86_asm_dialect = ASM_ATT;
2763 error ("bad value (%s) for %sasm=%s %s",
2764 ix86_asm_string, prefix, suffix, sw);
2766 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2767 error ("code model %qs not supported in the %s bit mode",
2768 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2769 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2770 sorry ("%i-bit mode not compiled in",
2771 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2773 for (i = 0; i < pta_size; i++)
2774 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2776 ix86_schedule = processor_alias_table[i].schedule;
2777 ix86_arch = processor_alias_table[i].processor;
2778 /* Default cpu tuning to the architecture. */
2779 ix86_tune = ix86_arch;
2781 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2782 error ("CPU you selected does not support x86-64 "
2785 if (processor_alias_table[i].flags & PTA_MMX
2786 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2787 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2788 if (processor_alias_table[i].flags & PTA_3DNOW
2789 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2790 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2791 if (processor_alias_table[i].flags & PTA_3DNOW_A
2792 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2793 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2794 if (processor_alias_table[i].flags & PTA_SSE
2795 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2796 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2797 if (processor_alias_table[i].flags & PTA_SSE2
2798 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2799 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2800 if (processor_alias_table[i].flags & PTA_SSE3
2801 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2802 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2803 if (processor_alias_table[i].flags & PTA_SSSE3
2804 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2805 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2806 if (processor_alias_table[i].flags & PTA_SSE4_1
2807 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2808 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2809 if (processor_alias_table[i].flags & PTA_SSE4_2
2810 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2811 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2812 if (processor_alias_table[i].flags & PTA_AVX
2813 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2814 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2815 if (processor_alias_table[i].flags & PTA_FMA
2816 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2817 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2818 if (processor_alias_table[i].flags & PTA_SSE4A
2819 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2820 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2821 if (processor_alias_table[i].flags & PTA_SSE5
2822 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2823 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2824 if (processor_alias_table[i].flags & PTA_ABM
2825 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2826 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2827 if (processor_alias_table[i].flags & PTA_CX16
2828 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2829 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2830 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2831 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2832 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2833 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2834 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2835 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2836 if (processor_alias_table[i].flags & PTA_AES
2837 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2838 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2839 if (processor_alias_table[i].flags & PTA_PCLMUL
2840 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2841 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2842 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2843 x86_prefetch_sse = true;
2849 error ("bad value (%s) for %sarch=%s %s",
2850 ix86_arch_string, prefix, suffix, sw);
2852 ix86_arch_mask = 1u << ix86_arch;
2853 for (i = 0; i < X86_ARCH_LAST; ++i)
2854 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2856 for (i = 0; i < pta_size; i++)
2857 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2859 ix86_schedule = processor_alias_table[i].schedule;
2860 ix86_tune = processor_alias_table[i].processor;
2861 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2863 if (ix86_tune_defaulted)
2865 ix86_tune_string = "x86-64";
2866 for (i = 0; i < pta_size; i++)
2867 if (! strcmp (ix86_tune_string,
2868 processor_alias_table[i].name))
2870 ix86_schedule = processor_alias_table[i].schedule;
2871 ix86_tune = processor_alias_table[i].processor;
2874 error ("CPU you selected does not support x86-64 "
2877 /* Intel CPUs have always interpreted SSE prefetch instructions as
2878 NOPs; so, we can enable SSE prefetch instructions even when
2879 -mtune (rather than -march) points us to a processor that has them.
2880 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2881 higher processors. */
2883 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2884 x86_prefetch_sse = true;
2888 error ("bad value (%s) for %stune=%s %s",
2889 ix86_tune_string, prefix, suffix, sw);
2891 ix86_tune_mask = 1u << ix86_tune;
2892 for (i = 0; i < X86_TUNE_LAST; ++i)
2893 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2896 ix86_cost = &ix86_size_cost;
2898 ix86_cost = processor_target_table[ix86_tune].cost;
2900 /* Arrange to set up i386_stack_locals for all functions. */
2901 init_machine_status = ix86_init_machine_status;
2903 /* Validate -mregparm= value. */
2904 if (ix86_regparm_string)
2907 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2908 i = atoi (ix86_regparm_string);
2909 if (i < 0 || i > REGPARM_MAX)
2910 error ("%sregparm=%d%s is not between 0 and %d",
2911 prefix, i, suffix, REGPARM_MAX);
2916 ix86_regparm = REGPARM_MAX;
2918 /* If the user has provided any of the -malign-* options,
2919 warn and use that value only if -falign-* is not set.
2920 Remove this code in GCC 3.2 or later. */
2921 if (ix86_align_loops_string)
2923 warning (0, "%salign-loops%s is obsolete, use %salign-loops%s",
2924 prefix, suffix, prefix, suffix);
2925 if (align_loops == 0)
2927 i = atoi (ix86_align_loops_string);
2928 if (i < 0 || i > MAX_CODE_ALIGN)
2929 error ("%salign-loops=%d%s is not between 0 and %d",
2930 prefix, i, suffix, MAX_CODE_ALIGN);
2932 align_loops = 1 << i;
2936 if (ix86_align_jumps_string)
2938 warning (0, "%salign-jumps%s is obsolete, use %salign-jumps%s",
2939 prefix, suffix, prefix, suffix);
2940 if (align_jumps == 0)
2942 i = atoi (ix86_align_jumps_string);
2943 if (i < 0 || i > MAX_CODE_ALIGN)
2944 error ("%salign-loops=%d%s is not between 0 and %d",
2945 prefix, i, suffix, MAX_CODE_ALIGN);
2947 align_jumps = 1 << i;
2951 if (ix86_align_funcs_string)
2953 warning (0, "%salign-functions%s is obsolete, use %salign-functions%s",
2954 prefix, suffix, prefix, suffix);
2955 if (align_functions == 0)
2957 i = atoi (ix86_align_funcs_string);
2958 if (i < 0 || i > MAX_CODE_ALIGN)
2959 error ("%salign-loops=%d%s is not between 0 and %d",
2960 prefix, i, suffix, MAX_CODE_ALIGN);
2962 align_functions = 1 << i;
2966 /* Default align_* from the processor table. */
2967 if (align_loops == 0)
2969 align_loops = processor_target_table[ix86_tune].align_loop;
2970 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2972 if (align_jumps == 0)
2974 align_jumps = processor_target_table[ix86_tune].align_jump;
2975 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2977 if (align_functions == 0)
2979 align_functions = processor_target_table[ix86_tune].align_func;
2982 /* Validate -mbranch-cost= value, or provide default. */
2983 ix86_branch_cost = ix86_cost->branch_cost;
2984 if (ix86_branch_cost_string)
2986 i = atoi (ix86_branch_cost_string);
2988 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
2990 ix86_branch_cost = i;
2992 if (ix86_section_threshold_string)
2994 i = atoi (ix86_section_threshold_string);
2996 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
2998 ix86_section_threshold = i;
3001 if (ix86_tls_dialect_string)
3003 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3004 ix86_tls_dialect = TLS_DIALECT_GNU;
3005 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3006 ix86_tls_dialect = TLS_DIALECT_GNU2;
3007 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3008 ix86_tls_dialect = TLS_DIALECT_SUN;
3010 error ("bad value (%s) for %stls-dialect=%s %s",
3011 ix86_tls_dialect_string, prefix, suffix, sw);
3014 if (ix87_precision_string)
3016 i = atoi (ix87_precision_string);
3017 if (i != 32 && i != 64 && i != 80)
3018 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3023 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3025 /* Enable by default the SSE and MMX builtins. Do allow the user to
3026 explicitly disable any of these. In particular, disabling SSE and
3027 MMX for kernel code is extremely useful. */
3028 if (!ix86_arch_specified)
3030 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3031 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3034 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3038 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3040 if (!ix86_arch_specified)
3042 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3044 /* i386 ABI does not specify red zone. It still makes sense to use it
3045 when programmer takes care to stack from being destroyed. */
3046 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3047 target_flags |= MASK_NO_RED_ZONE;
3050 /* Keep nonleaf frame pointers. */
3051 if (flag_omit_frame_pointer)
3052 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3053 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3054 flag_omit_frame_pointer = 1;
3056 /* If we're doing fast math, we don't care about comparison order
3057 wrt NaNs. This lets us use a shorter comparison sequence. */
3058 if (flag_finite_math_only)
3059 target_flags &= ~MASK_IEEE_FP;
3061 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3062 since the insns won't need emulation. */
3063 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3064 target_flags &= ~MASK_NO_FANCY_MATH_387;
3066 /* Likewise, if the target doesn't have a 387, or we've specified
3067 software floating point, don't use 387 inline intrinsics. */
3069 target_flags |= MASK_NO_FANCY_MATH_387;
3071 /* Turn on MMX builtins for -msse. */
3074 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3075 x86_prefetch_sse = true;
3078 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3079 if (TARGET_SSE4_2 || TARGET_ABM)
3080 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3082 /* Validate -mpreferred-stack-boundary= value or default it to
3083 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3084 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3085 if (ix86_preferred_stack_boundary_string)
3087 i = atoi (ix86_preferred_stack_boundary_string);
3088 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3089 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3090 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3092 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3095 /* Set the default value for -mstackrealign. */
3096 if (ix86_force_align_arg_pointer == -1)
3097 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3099 /* Validate -mincoming-stack-boundary= value or default it to
3100 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3101 if (ix86_force_align_arg_pointer)
3102 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3104 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3105 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3106 if (ix86_incoming_stack_boundary_string)
3108 i = atoi (ix86_incoming_stack_boundary_string);
3109 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3110 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3111 i, TARGET_64BIT ? 4 : 2);
3114 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3115 ix86_incoming_stack_boundary
3116 = ix86_user_incoming_stack_boundary;
3120 /* Accept -msseregparm only if at least SSE support is enabled. */
3121 if (TARGET_SSEREGPARM
3123 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3125 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3126 if (ix86_fpmath_string != 0)
3128 if (! strcmp (ix86_fpmath_string, "387"))
3129 ix86_fpmath = FPMATH_387;
3130 else if (! strcmp (ix86_fpmath_string, "sse"))
3134 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3135 ix86_fpmath = FPMATH_387;
3138 ix86_fpmath = FPMATH_SSE;
3140 else if (! strcmp (ix86_fpmath_string, "387,sse")
3141 || ! strcmp (ix86_fpmath_string, "387+sse")
3142 || ! strcmp (ix86_fpmath_string, "sse,387")
3143 || ! strcmp (ix86_fpmath_string, "sse+387")
3144 || ! strcmp (ix86_fpmath_string, "both"))
3148 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3149 ix86_fpmath = FPMATH_387;
3151 else if (!TARGET_80387)
3153 warning (0, "387 instruction set disabled, using SSE arithmetics");
3154 ix86_fpmath = FPMATH_SSE;
3157 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3160 error ("bad value (%s) for %sfpmath=%s %s",
3161 ix86_fpmath_string, prefix, suffix, sw);
3164 /* If the i387 is disabled, then do not return values in it. */
3166 target_flags &= ~MASK_FLOAT_RETURNS;
3168 /* Use external vectorized library in vectorizing intrinsics. */
3169 if (ix86_veclibabi_string)
3171 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3172 ix86_veclib_handler = ix86_veclibabi_svml;
3173 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3174 ix86_veclib_handler = ix86_veclibabi_acml;
3176 error ("unknown vectorization library ABI type (%s) for "
3177 "%sveclibabi=%s %s", ix86_veclibabi_string,
3178 prefix, suffix, sw);
3181 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3182 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3184 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3186 /* ??? Unwind info is not correct around the CFG unless either a frame
3187 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3188 unwind info generation to be aware of the CFG and propagating states
3190 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3191 || flag_exceptions || flag_non_call_exceptions)
3192 && flag_omit_frame_pointer
3193 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3195 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3196 warning (0, "unwind tables currently require either a frame pointer "
3197 "or %saccumulate-outgoing-args%s for correctness",
3199 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3202 /* If stack probes are required, the space used for large function
3203 arguments on the stack must also be probed, so enable
3204 -maccumulate-outgoing-args so this happens in the prologue. */
3205 if (TARGET_STACK_PROBE
3206 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3208 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3209 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3210 "for correctness", prefix, suffix);
3211 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3214 /* For sane SSE instruction set generation we need fcomi instruction.
3215 It is safe to enable all CMOVE instructions. */
3219 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3222 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3223 p = strchr (internal_label_prefix, 'X');
3224 internal_label_prefix_len = p - internal_label_prefix;
3228 /* When scheduling description is not available, disable scheduler pass
3229 so it won't slow down the compilation and make x87 code slower. */
3230 if (!TARGET_SCHEDULE)
3231 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3233 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3234 set_param_value ("simultaneous-prefetches",
3235 ix86_cost->simultaneous_prefetches);
3236 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3237 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3238 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3239 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3240 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3241 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3243 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3244 can be optimized to ap = __builtin_next_arg (0). */
3246 targetm.expand_builtin_va_start = NULL;
3250 ix86_gen_leave = gen_leave_rex64;
3251 ix86_gen_pop1 = gen_popdi1;
3252 ix86_gen_add3 = gen_adddi3;
3253 ix86_gen_sub3 = gen_subdi3;
3254 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3255 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3256 ix86_gen_monitor = gen_sse3_monitor64;
3257 ix86_gen_andsp = gen_anddi3;
3261 ix86_gen_leave = gen_leave;
3262 ix86_gen_pop1 = gen_popsi1;
3263 ix86_gen_add3 = gen_addsi3;
3264 ix86_gen_sub3 = gen_subsi3;
3265 ix86_gen_sub3_carry = gen_subsi3_carry;
3266 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3267 ix86_gen_monitor = gen_sse3_monitor;
3268 ix86_gen_andsp = gen_andsi3;
3272 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3274 target_flags |= MASK_CLD & ~target_flags_explicit;
3277 /* Save the initial options in case the user does function specific options */
3279 target_option_default_node = target_option_current_node
3280 = build_target_option_node ();
3283 /* Save the current options */
3286 ix86_function_specific_save (struct cl_target_option *ptr)
3288 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3289 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3290 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3291 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3292 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3294 ptr->arch = ix86_arch;
3295 ptr->schedule = ix86_schedule;
3296 ptr->tune = ix86_tune;
3297 ptr->fpmath = ix86_fpmath;
3298 ptr->branch_cost = ix86_branch_cost;
3299 ptr->tune_defaulted = ix86_tune_defaulted;
3300 ptr->arch_specified = ix86_arch_specified;
3301 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3302 ptr->target_flags_explicit = target_flags_explicit;
3305 /* Restore the current options */
3308 ix86_function_specific_restore (struct cl_target_option *ptr)
3310 enum processor_type old_tune = ix86_tune;
3311 enum processor_type old_arch = ix86_arch;
3312 unsigned int ix86_arch_mask, ix86_tune_mask;
3315 ix86_arch = ptr->arch;
3316 ix86_schedule = ptr->schedule;
3317 ix86_tune = ptr->tune;
3318 ix86_fpmath = ptr->fpmath;
3319 ix86_branch_cost = ptr->branch_cost;
3320 ix86_tune_defaulted = ptr->tune_defaulted;
3321 ix86_arch_specified = ptr->arch_specified;
3322 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3323 target_flags_explicit = ptr->target_flags_explicit;
3325 /* Recreate the arch feature tests if the arch changed */
3326 if (old_arch != ix86_arch)
3328 ix86_arch_mask = 1u << ix86_arch;
3329 for (i = 0; i < X86_ARCH_LAST; ++i)
3330 ix86_arch_features[i]
3331 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3334 /* Recreate the tune optimization tests */
3335 if (old_tune != ix86_tune)
3337 ix86_tune_mask = 1u << ix86_tune;
3338 for (i = 0; i < X86_TUNE_LAST; ++i)
3339 ix86_tune_features[i]
3340 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3344 /* Print the current options */
3347 ix86_function_specific_print (FILE *file, int indent,
3348 struct cl_target_option *ptr)
3351 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3352 NULL, NULL, NULL, false);
3354 fprintf (file, "%*sarch = %d (%s)\n",
3357 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3358 ? cpu_names[ptr->arch]
3361 fprintf (file, "%*stune = %d (%s)\n",
3364 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3365 ? cpu_names[ptr->tune]
3368 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3369 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3370 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3371 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3375 fprintf (file, "%*s%s\n", indent, "", target_string);
3376 free (target_string);
3381 /* Inner function to process the attribute((target(...))), take an argument and
3382 set the current options from the argument. If we have a list, recursively go
3386 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3391 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3392 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3393 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3394 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3409 enum ix86_opt_type type;
3414 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3415 IX86_ATTR_ISA ("abm", OPT_mabm),
3416 IX86_ATTR_ISA ("aes", OPT_maes),
3417 IX86_ATTR_ISA ("avx", OPT_mavx),
3418 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3419 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3420 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3421 IX86_ATTR_ISA ("sse", OPT_msse),
3422 IX86_ATTR_ISA ("sse2", OPT_msse2),
3423 IX86_ATTR_ISA ("sse3", OPT_msse3),
3424 IX86_ATTR_ISA ("sse4", OPT_msse4),
3425 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3426 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3427 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3428 IX86_ATTR_ISA ("sse5", OPT_msse5),
3429 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3431 /* string options */
3432 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3433 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3434 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3437 IX86_ATTR_YES ("cld",
3441 IX86_ATTR_NO ("fancy-math-387",
3442 OPT_mfancy_math_387,
3443 MASK_NO_FANCY_MATH_387),
3445 IX86_ATTR_NO ("fused-madd",
3447 MASK_NO_FUSED_MADD),
3449 IX86_ATTR_YES ("ieee-fp",
3453 IX86_ATTR_YES ("inline-all-stringops",
3454 OPT_minline_all_stringops,
3455 MASK_INLINE_ALL_STRINGOPS),
3457 IX86_ATTR_YES ("inline-stringops-dynamically",
3458 OPT_minline_stringops_dynamically,
3459 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3461 IX86_ATTR_NO ("align-stringops",
3462 OPT_mno_align_stringops,
3463 MASK_NO_ALIGN_STRINGOPS),
3465 IX86_ATTR_YES ("recip",
3471 /* If this is a list, recurse to get the options. */
3472 if (TREE_CODE (args) == TREE_LIST)
3476 for (; args; args = TREE_CHAIN (args))
3477 if (TREE_VALUE (args)
3478 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3484 else if (TREE_CODE (args) != STRING_CST)
3487 /* Handle multiple arguments separated by commas. */
3488 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3490 while (next_optstr && *next_optstr != '\0')
3492 char *p = next_optstr;
3494 char *comma = strchr (next_optstr, ',');
3495 const char *opt_string;
3496 size_t len, opt_len;
3501 enum ix86_opt_type type = ix86_opt_unknown;
3507 len = comma - next_optstr;
3508 next_optstr = comma + 1;
3516 /* Recognize no-xxx. */
3517 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3526 /* Find the option. */
3529 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3531 type = attrs[i].type;
3532 opt_len = attrs[i].len;
3533 if (ch == attrs[i].string[0]
3534 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3535 && memcmp (p, attrs[i].string, opt_len) == 0)
3538 mask = attrs[i].mask;
3539 opt_string = attrs[i].string;
3544 /* Process the option. */
3547 error ("attribute(target(\"%s\")) is unknown", orig_p);
3551 else if (type == ix86_opt_isa)
3552 ix86_handle_option (opt, p, opt_set_p);
3554 else if (type == ix86_opt_yes || type == ix86_opt_no)
3556 if (type == ix86_opt_no)
3557 opt_set_p = !opt_set_p;
3560 target_flags |= mask;
3562 target_flags &= ~mask;
3565 else if (type == ix86_opt_str)
3569 error ("option(\"%s\") was already specified", opt_string);
3573 p_strings[opt] = xstrdup (p + opt_len);
3583 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3586 ix86_valid_target_attribute_tree (tree args)
3588 const char *orig_arch_string = ix86_arch_string;
3589 const char *orig_tune_string = ix86_tune_string;
3590 const char *orig_fpmath_string = ix86_fpmath_string;
3591 int orig_tune_defaulted = ix86_tune_defaulted;
3592 int orig_arch_specified = ix86_arch_specified;
3593 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3596 struct cl_target_option *def
3597 = TREE_TARGET_OPTION (target_option_default_node);
3599 /* Process each of the options on the chain. */
3600 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3603 /* If the changed options are different from the default, rerun override_options,
3604 and then save the options away. The string options are are attribute options,
3605 and will be undone when we copy the save structure. */
3606 if (ix86_isa_flags != def->ix86_isa_flags
3607 || target_flags != def->target_flags
3608 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3609 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3610 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3612 /* If we are using the default tune= or arch=, undo the string assigned,
3613 and use the default. */
3614 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3615 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3616 else if (!orig_arch_specified)
3617 ix86_arch_string = NULL;
3619 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3620 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3621 else if (orig_tune_defaulted)
3622 ix86_tune_string = NULL;
3624 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3625 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3626 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3627 else if (!TARGET_64BIT && TARGET_SSE)
3628 ix86_fpmath_string = "sse,387";
3630 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3631 override_options (false);
3633 /* Add any builtin functions with the new isa if any. */
3634 ix86_add_new_builtins (ix86_isa_flags);
3636 /* Save the current options unless we are validating options for
3638 t = build_target_option_node ();
3640 ix86_arch_string = orig_arch_string;
3641 ix86_tune_string = orig_tune_string;
3642 ix86_fpmath_string = orig_fpmath_string;
3644 /* Free up memory allocated to hold the strings */
3645 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3646 if (option_strings[i])
3647 free (option_strings[i]);
3653 /* Hook to validate attribute((target("string"))). */
3656 ix86_valid_target_attribute_p (tree fndecl,
3657 tree ARG_UNUSED (name),
3659 int ARG_UNUSED (flags))
3661 struct cl_target_option cur_target;
3663 tree old_optimize = build_optimization_node ();
3664 tree new_target, new_optimize;
3665 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3667 /* If the function changed the optimization levels as well as setting target
3668 options, start with the optimizations specified. */
3669 if (func_optimize && func_optimize != old_optimize)
3670 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3672 /* The target attributes may also change some optimization flags, so update
3673 the optimization options if necessary. */
3674 cl_target_option_save (&cur_target);
3675 new_target = ix86_valid_target_attribute_tree (args);
3676 new_optimize = build_optimization_node ();
3683 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3685 if (old_optimize != new_optimize)
3686 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3689 cl_target_option_restore (&cur_target);
3691 if (old_optimize != new_optimize)
3692 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3698 /* Hook to determine if one function can safely inline another. */
3701 ix86_can_inline_p (tree caller, tree callee)
3704 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3705 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3707 /* If callee has no option attributes, then it is ok to inline. */
3711 /* If caller has no option attributes, but callee does then it is not ok to
3713 else if (!caller_tree)
3718 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3719 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3721 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3722 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3724 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3725 != callee_opts->ix86_isa_flags)
3728 /* See if we have the same non-isa options. */
3729 else if (caller_opts->target_flags != callee_opts->target_flags)
3732 /* See if arch, tune, etc. are the same. */
3733 else if (caller_opts->arch != callee_opts->arch)
3736 else if (caller_opts->tune != callee_opts->tune)
3739 else if (caller_opts->fpmath != callee_opts->fpmath)
3742 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3753 /* Remember the last target of ix86_set_current_function. */
3754 static GTY(()) tree ix86_previous_fndecl;
3756 /* Establish appropriate back-end context for processing the function
3757 FNDECL. The argument might be NULL to indicate processing at top
3758 level, outside of any function scope. */
3760 ix86_set_current_function (tree fndecl)
3762 /* Only change the context if the function changes. This hook is called
3763 several times in the course of compiling a function, and we don't want to
3764 slow things down too much or call target_reinit when it isn't safe. */
3765 if (fndecl && fndecl != ix86_previous_fndecl)
3767 tree old_tree = (ix86_previous_fndecl
3768 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3771 tree new_tree = (fndecl
3772 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3775 ix86_previous_fndecl = fndecl;
3776 if (old_tree == new_tree)
3781 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3787 struct cl_target_option *def
3788 = TREE_TARGET_OPTION (target_option_current_node);
3790 cl_target_option_restore (def);
3797 /* Return true if this goes in large data/bss. */
3800 ix86_in_large_data_p (tree exp)
3802 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3805 /* Functions are never large data. */
3806 if (TREE_CODE (exp) == FUNCTION_DECL)
3809 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3811 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3812 if (strcmp (section, ".ldata") == 0
3813 || strcmp (section, ".lbss") == 0)
3819 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3821 /* If this is an incomplete type with size 0, then we can't put it
3822 in data because it might be too big when completed. */
3823 if (!size || size > ix86_section_threshold)
3830 /* Switch to the appropriate section for output of DECL.
3831 DECL is either a `VAR_DECL' node or a constant of some sort.
3832 RELOC indicates whether forming the initial value of DECL requires
3833 link-time relocations. */
3835 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3839 x86_64_elf_select_section (tree decl, int reloc,
3840 unsigned HOST_WIDE_INT align)
3842 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3843 && ix86_in_large_data_p (decl))
3845 const char *sname = NULL;
3846 unsigned int flags = SECTION_WRITE;
3847 switch (categorize_decl_for_section (decl, reloc))
3852 case SECCAT_DATA_REL:
3853 sname = ".ldata.rel";
3855 case SECCAT_DATA_REL_LOCAL:
3856 sname = ".ldata.rel.local";
3858 case SECCAT_DATA_REL_RO:
3859 sname = ".ldata.rel.ro";
3861 case SECCAT_DATA_REL_RO_LOCAL:
3862 sname = ".ldata.rel.ro.local";
3866 flags |= SECTION_BSS;
3869 case SECCAT_RODATA_MERGE_STR:
3870 case SECCAT_RODATA_MERGE_STR_INIT:
3871 case SECCAT_RODATA_MERGE_CONST:
3875 case SECCAT_SRODATA:
3882 /* We don't split these for medium model. Place them into
3883 default sections and hope for best. */
3885 case SECCAT_EMUTLS_VAR:
3886 case SECCAT_EMUTLS_TMPL:
3891 /* We might get called with string constants, but get_named_section
3892 doesn't like them as they are not DECLs. Also, we need to set
3893 flags in that case. */
3895 return get_section (sname, flags, NULL);
3896 return get_named_section (decl, sname, reloc);
3899 return default_elf_select_section (decl, reloc, align);
3902 /* Build up a unique section name, expressed as a
3903 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
3904 RELOC indicates whether the initial value of EXP requires
3905 link-time relocations. */
3907 static void ATTRIBUTE_UNUSED
3908 x86_64_elf_unique_section (tree decl, int reloc)
3910 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3911 && ix86_in_large_data_p (decl))
3913 const char *prefix = NULL;
3914 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
3915 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
3917 switch (categorize_decl_for_section (decl, reloc))
3920 case SECCAT_DATA_REL:
3921 case SECCAT_DATA_REL_LOCAL:
3922 case SECCAT_DATA_REL_RO:
3923 case SECCAT_DATA_REL_RO_LOCAL:
3924 prefix = one_only ? ".ld" : ".ldata";
3927 prefix = one_only ? ".lb" : ".lbss";
3930 case SECCAT_RODATA_MERGE_STR:
3931 case SECCAT_RODATA_MERGE_STR_INIT:
3932 case SECCAT_RODATA_MERGE_CONST:
3933 prefix = one_only ? ".lr" : ".lrodata";
3935 case SECCAT_SRODATA:
3942 /* We don't split these for medium model. Place them into
3943 default sections and hope for best. */
3945 case SECCAT_EMUTLS_VAR:
3946 prefix = targetm.emutls.var_section;
3948 case SECCAT_EMUTLS_TMPL:
3949 prefix = targetm.emutls.tmpl_section;
3954 const char *name, *linkonce;
3957 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
3958 name = targetm.strip_name_encoding (name);
3960 /* If we're using one_only, then there needs to be a .gnu.linkonce
3961 prefix to the section name. */
3962 linkonce = one_only ? ".gnu.linkonce" : "";
3964 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
3966 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
3970 default_unique_section (decl, reloc);
3973 #ifdef COMMON_ASM_OP
3974 /* This says how to output assembler code to declare an
3975 uninitialized external linkage data object.
3977 For medium model x86-64 we need to use .largecomm opcode for
3980 x86_elf_aligned_common (FILE *file,
3981 const char *name, unsigned HOST_WIDE_INT size,
3984 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3985 && size > (unsigned int)ix86_section_threshold)
3986 fprintf (file, ".largecomm\t");
3988 fprintf (file, "%s", COMMON_ASM_OP);
3989 assemble_name (file, name);
3990 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
3991 size, align / BITS_PER_UNIT);
3995 /* Utility function for targets to use in implementing
3996 ASM_OUTPUT_ALIGNED_BSS. */
3999 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4000 const char *name, unsigned HOST_WIDE_INT size,
4003 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4004 && size > (unsigned int)ix86_section_threshold)
4005 switch_to_section (get_named_section (decl, ".lbss", 0));
4007 switch_to_section (bss_section);
4008 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4009 #ifdef ASM_DECLARE_OBJECT_NAME
4010 last_assemble_variable_decl = decl;
4011 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4013 /* Standard thing is just output label for the object. */
4014 ASM_OUTPUT_LABEL (file, name);
4015 #endif /* ASM_DECLARE_OBJECT_NAME */
4016 ASM_OUTPUT_SKIP (file, size ? size : 1);
4020 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4022 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4023 make the problem with not enough registers even worse. */
4024 #ifdef INSN_SCHEDULING
4026 flag_schedule_insns = 0;
4030 /* The Darwin libraries never set errno, so we might as well
4031 avoid calling them when that's the only reason we would. */
4032 flag_errno_math = 0;
4034 /* The default values of these switches depend on the TARGET_64BIT
4035 that is not known at this moment. Mark these values with 2 and
4036 let user the to override these. In case there is no command line option
4037 specifying them, we will set the defaults in override_options. */
4039 flag_omit_frame_pointer = 2;
4040 flag_pcc_struct_return = 2;
4041 flag_asynchronous_unwind_tables = 2;
4042 flag_vect_cost_model = 1;
4043 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4044 SUBTARGET_OPTIMIZATION_OPTIONS;
4048 /* Decide whether we can make a sibling call to a function. DECL is the
4049 declaration of the function being targeted by the call and EXP is the
4050 CALL_EXPR representing the call. */
4053 ix86_function_ok_for_sibcall (tree decl, tree exp)
4058 /* If we are generating position-independent code, we cannot sibcall
4059 optimize any indirect call, or a direct call to a global function,
4060 as the PLT requires %ebx be live. */
4061 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4068 func = TREE_TYPE (CALL_EXPR_FN (exp));
4069 if (POINTER_TYPE_P (func))
4070 func = TREE_TYPE (func);
4073 /* Check that the return value locations are the same. Like
4074 if we are returning floats on the 80387 register stack, we cannot
4075 make a sibcall from a function that doesn't return a float to a
4076 function that does or, conversely, from a function that does return
4077 a float to a function that doesn't; the necessary stack adjustment
4078 would not be executed. This is also the place we notice
4079 differences in the return value ABI. Note that it is ok for one
4080 of the functions to have void return type as long as the return
4081 value of the other is passed in a register. */
4082 a = ix86_function_value (TREE_TYPE (exp), func, false);
4083 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4085 if (STACK_REG_P (a) || STACK_REG_P (b))
4087 if (!rtx_equal_p (a, b))
4090 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4092 else if (!rtx_equal_p (a, b))
4095 /* If this call is indirect, we'll need to be able to use a call-clobbered
4096 register for the address of the target function. Make sure that all
4097 such registers are not used for passing parameters. */
4098 if (!decl && !TARGET_64BIT)
4102 /* We're looking at the CALL_EXPR, we need the type of the function. */
4103 type = CALL_EXPR_FN (exp); /* pointer expression */
4104 type = TREE_TYPE (type); /* pointer type */
4105 type = TREE_TYPE (type); /* function type */
4107 if (ix86_function_regparm (type, NULL) >= 3)
4109 /* ??? Need to count the actual number of registers to be used,
4110 not the possible number of registers. Fix later. */
4115 /* Dllimport'd functions are also called indirectly. */
4116 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4118 && decl && DECL_DLLIMPORT_P (decl)
4119 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4122 /* If we need to align the outgoing stack, then sibcalling would
4123 unalign the stack, which may break the called function. */
4124 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4127 /* Otherwise okay. That also includes certain types of indirect calls. */
4131 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4132 calling convention attributes;
4133 arguments as in struct attribute_spec.handler. */
4136 ix86_handle_cconv_attribute (tree *node, tree name,
4138 int flags ATTRIBUTE_UNUSED,
4141 if (TREE_CODE (*node) != FUNCTION_TYPE
4142 && TREE_CODE (*node) != METHOD_TYPE
4143 && TREE_CODE (*node) != FIELD_DECL
4144 && TREE_CODE (*node) != TYPE_DECL)
4146 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4147 IDENTIFIER_POINTER (name));
4148 *no_add_attrs = true;
4152 /* Can combine regparm with all attributes but fastcall. */
4153 if (is_attribute_p ("regparm", name))
4157 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4159 error ("fastcall and regparm attributes are not compatible");
4162 cst = TREE_VALUE (args);
4163 if (TREE_CODE (cst) != INTEGER_CST)
4165 warning (OPT_Wattributes,
4166 "%qs attribute requires an integer constant argument",
4167 IDENTIFIER_POINTER (name));
4168 *no_add_attrs = true;
4170 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4172 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4173 IDENTIFIER_POINTER (name), REGPARM_MAX);
4174 *no_add_attrs = true;
4182 /* Do not warn when emulating the MS ABI. */
4183 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4184 warning (OPT_Wattributes, "%qs attribute ignored",
4185 IDENTIFIER_POINTER (name));
4186 *no_add_attrs = true;
4190 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4191 if (is_attribute_p ("fastcall", name))
4193 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4195 error ("fastcall and cdecl attributes are not compatible");
4197 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4199 error ("fastcall and stdcall attributes are not compatible");
4201 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4203 error ("fastcall and regparm attributes are not compatible");
4207 /* Can combine stdcall with fastcall (redundant), regparm and
4209 else if (is_attribute_p ("stdcall", name))
4211 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4213 error ("stdcall and cdecl attributes are not compatible");
4215 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4217 error ("stdcall and fastcall attributes are not compatible");
4221 /* Can combine cdecl with regparm and sseregparm. */
4222 else if (is_attribute_p ("cdecl", name))
4224 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4226 error ("stdcall and cdecl attributes are not compatible");
4228 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4230 error ("fastcall and cdecl attributes are not compatible");
4234 /* Can combine sseregparm with all attributes. */
4239 /* Return 0 if the attributes for two types are incompatible, 1 if they
4240 are compatible, and 2 if they are nearly compatible (which causes a
4241 warning to be generated). */
4244 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4246 /* Check for mismatch of non-default calling convention. */
4247 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4249 if (TREE_CODE (type1) != FUNCTION_TYPE
4250 && TREE_CODE (type1) != METHOD_TYPE)
4253 /* Check for mismatched fastcall/regparm types. */
4254 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4255 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4256 || (ix86_function_regparm (type1, NULL)
4257 != ix86_function_regparm (type2, NULL)))
4260 /* Check for mismatched sseregparm types. */
4261 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4262 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4265 /* Check for mismatched return types (cdecl vs stdcall). */
4266 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4267 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4273 /* Return the regparm value for a function with the indicated TYPE and DECL.
4274 DECL may be NULL when calling function indirectly
4275 or considering a libcall. */
4278 ix86_function_regparm (const_tree type, const_tree decl)
4281 int regparm = ix86_regparm;
4283 static bool error_issued;
4287 if (ix86_function_type_abi (type) == DEFAULT_ABI)
4289 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
4292 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4296 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4298 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4300 /* We can't use regparm(3) for nested functions because
4301 these pass static chain pointer in %ecx register. */
4302 if (!error_issued && regparm == 3
4303 && decl_function_context (decl)
4304 && !DECL_NO_STATIC_CHAIN (decl))
4306 error ("nested functions are limited to 2 register parameters");
4307 error_issued = true;
4315 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4318 /* Use register calling convention for local functions when possible. */
4319 if (decl && TREE_CODE (decl) == FUNCTION_DECL
4322 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4323 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4326 int local_regparm, globals = 0, regno;
4329 /* Make sure no regparm register is taken by a
4330 fixed register variable. */
4331 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4332 if (fixed_regs[local_regparm])
4335 /* We can't use regparm(3) for nested functions as these use
4336 static chain pointer in third argument. */
4337 if (local_regparm == 3
4338 && decl_function_context (decl)
4339 && !DECL_NO_STATIC_CHAIN (decl))
4342 /* If the function realigns its stackpointer, the prologue will
4343 clobber %ecx. If we've already generated code for the callee,
4344 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4345 scanning the attributes for the self-realigning property. */
4346 f = DECL_STRUCT_FUNCTION (decl);
4347 /* Since current internal arg pointer won't conflict with
4348 parameter passing regs, so no need to change stack
4349 realignment and adjust regparm number.
4351 Each fixed register usage increases register pressure,
4352 so less registers should be used for argument passing.
4353 This functionality can be overriden by an explicit
4355 for (regno = 0; regno <= DI_REG; regno++)
4356 if (fixed_regs[regno])
4360 = globals < local_regparm ? local_regparm - globals : 0;
4362 if (local_regparm > regparm)
4363 regparm = local_regparm;
4370 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4371 DFmode (2) arguments in SSE registers for a function with the
4372 indicated TYPE and DECL. DECL may be NULL when calling function
4373 indirectly or considering a libcall. Otherwise return 0. */
4376 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4378 gcc_assert (!TARGET_64BIT);
4380 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4381 by the sseregparm attribute. */
4382 if (TARGET_SSEREGPARM
4383 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4390 error ("Calling %qD with attribute sseregparm without "
4391 "SSE/SSE2 enabled", decl);
4393 error ("Calling %qT with attribute sseregparm without "
4394 "SSE/SSE2 enabled", type);
4402 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4403 (and DFmode for SSE2) arguments in SSE registers. */
4404 if (decl && TARGET_SSE_MATH && !profile_flag)
4406 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4407 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4409 return TARGET_SSE2 ? 2 : 1;
4415 /* Return true if EAX is live at the start of the function. Used by
4416 ix86_expand_prologue to determine if we need special help before
4417 calling allocate_stack_worker. */
4420 ix86_eax_live_at_start_p (void)
4422 /* Cheat. Don't bother working forward from ix86_function_regparm
4423 to the function type to whether an actual argument is located in
4424 eax. Instead just look at cfg info, which is still close enough
4425 to correct at this point. This gives false positives for broken
4426 functions that might use uninitialized data that happens to be
4427 allocated in eax, but who cares? */
4428 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4431 /* Value is the number of bytes of arguments automatically
4432 popped when returning from a subroutine call.
4433 FUNDECL is the declaration node of the function (as a tree),
4434 FUNTYPE is the data type of the function (as a tree),
4435 or for a library call it is an identifier node for the subroutine name.
4436 SIZE is the number of bytes of arguments passed on the stack.
4438 On the 80386, the RTD insn may be used to pop them if the number
4439 of args is fixed, but if the number is variable then the caller
4440 must pop them all. RTD can't be used for library calls now
4441 because the library is compiled with the Unix compiler.
4442 Use of RTD is a selectable option, since it is incompatible with
4443 standard Unix calling sequences. If the option is not selected,
4444 the caller must always pop the args.
4446 The attribute stdcall is equivalent to RTD on a per module basis. */
4449 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4453 /* None of the 64-bit ABIs pop arguments. */
4457 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4459 /* Cdecl functions override -mrtd, and never pop the stack. */
4460 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4462 /* Stdcall and fastcall functions will pop the stack if not
4464 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4465 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4468 if (rtd && ! stdarg_p (funtype))
4472 /* Lose any fake structure return argument if it is passed on the stack. */
4473 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4474 && !KEEP_AGGREGATE_RETURN_POINTER)
4476 int nregs = ix86_function_regparm (funtype, fundecl);
4478 return GET_MODE_SIZE (Pmode);
4484 /* Argument support functions. */
4486 /* Return true when register may be used to pass function parameters. */
4488 ix86_function_arg_regno_p (int regno)
4491 const int *parm_regs;
4496 return (regno < REGPARM_MAX
4497 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4499 return (regno < REGPARM_MAX
4500 || (TARGET_MMX && MMX_REGNO_P (regno)
4501 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4502 || (TARGET_SSE && SSE_REGNO_P (regno)
4503 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4508 if (SSE_REGNO_P (regno) && TARGET_SSE)
4513 if (TARGET_SSE && SSE_REGNO_P (regno)
4514 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4518 /* TODO: The function should depend on current function ABI but
4519 builtins.c would need updating then. Therefore we use the
4522 /* RAX is used as hidden argument to va_arg functions. */
4523 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
4526 if (DEFAULT_ABI == MS_ABI)
4527 parm_regs = x86_64_ms_abi_int_parameter_registers;
4529 parm_regs = x86_64_int_parameter_registers;
4530 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
4531 : X86_64_REGPARM_MAX); i++)
4532 if (regno == parm_regs[i])
4537 /* Return if we do not know how to pass TYPE solely in registers. */
4540 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4542 if (must_pass_in_stack_var_size_or_pad (mode, type))
4545 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4546 The layout_type routine is crafty and tries to trick us into passing
4547 currently unsupported vector types on the stack by using TImode. */
4548 return (!TARGET_64BIT && mode == TImode
4549 && type && TREE_CODE (type) != VECTOR_TYPE);
4552 /* It returns the size, in bytes, of the area reserved for arguments passed
4553 in registers for the function represented by fndecl dependent to the used
4556 ix86_reg_parm_stack_space (const_tree fndecl)
4558 int call_abi = SYSV_ABI;
4559 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4560 call_abi = ix86_function_abi (fndecl);
4562 call_abi = ix86_function_type_abi (fndecl);
4563 if (call_abi == MS_ABI)
4568 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4571 ix86_function_type_abi (const_tree fntype)
4573 if (TARGET_64BIT && fntype != NULL)
4576 if (DEFAULT_ABI == SYSV_ABI)
4577 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
4579 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
4587 ix86_function_abi (const_tree fndecl)
4591 return ix86_function_type_abi (TREE_TYPE (fndecl));
4594 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4597 ix86_cfun_abi (void)
4599 if (! cfun || ! TARGET_64BIT)
4601 return cfun->machine->call_abi;
4605 extern void init_regs (void);
4607 /* Implementation of call abi switching target hook. Specific to FNDECL
4608 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4609 for more details. */
4611 ix86_call_abi_override (const_tree fndecl)
4613 if (fndecl == NULL_TREE)
4614 cfun->machine->call_abi = DEFAULT_ABI;
4616 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4619 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4620 re-initialization of init_regs each time we switch function context since
4621 this is needed only during RTL expansion. */
4623 ix86_maybe_switch_abi (void)
4626 call_used_regs[4 /*RSI*/] == (cfun->machine->call_abi == MS_ABI))
4630 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4631 for a call to a function whose data type is FNTYPE.
4632 For a library call, FNTYPE is 0. */
4635 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4636 tree fntype, /* tree ptr for function decl */
4637 rtx libname, /* SYMBOL_REF of library name or 0 */
4640 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4641 memset (cum, 0, sizeof (*cum));
4644 cum->call_abi = ix86_function_abi (fndecl);
4646 cum->call_abi = ix86_function_type_abi (fntype);
4647 /* Set up the number of registers to use for passing arguments. */
4649 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4650 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4651 cum->nregs = ix86_regparm;
4654 if (cum->call_abi != DEFAULT_ABI)
4655 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
4660 cum->sse_nregs = SSE_REGPARM_MAX;
4663 if (cum->call_abi != DEFAULT_ABI)
4664 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4665 : X64_SSE_REGPARM_MAX;
4669 cum->mmx_nregs = MMX_REGPARM_MAX;
4670 cum->warn_avx = true;
4671 cum->warn_sse = true;
4672 cum->warn_mmx = true;
4674 /* Because type might mismatch in between caller and callee, we need to
4675 use actual type of function for local calls.
4676 FIXME: cgraph_analyze can be told to actually record if function uses
4677 va_start so for local functions maybe_vaarg can be made aggressive
4679 FIXME: once typesytem is fixed, we won't need this code anymore. */
4681 fntype = TREE_TYPE (fndecl);
4682 cum->maybe_vaarg = (fntype
4683 ? (!prototype_p (fntype) || stdarg_p (fntype))
4688 /* If there are variable arguments, then we won't pass anything
4689 in registers in 32-bit mode. */
4690 if (stdarg_p (fntype))
4701 /* Use ecx and edx registers if function has fastcall attribute,
4702 else look for regparm information. */
4705 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4711 cum->nregs = ix86_function_regparm (fntype, fndecl);
4714 /* Set up the number of SSE registers used for passing SFmode
4715 and DFmode arguments. Warn for mismatching ABI. */
4716 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4720 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4721 But in the case of vector types, it is some vector mode.
4723 When we have only some of our vector isa extensions enabled, then there
4724 are some modes for which vector_mode_supported_p is false. For these
4725 modes, the generic vector support in gcc will choose some non-vector mode
4726 in order to implement the type. By computing the natural mode, we'll
4727 select the proper ABI location for the operand and not depend on whatever
4728 the middle-end decides to do with these vector types. */
4730 static enum machine_mode
4731 type_natural_mode (const_tree type)
4733 enum machine_mode mode = TYPE_MODE (type);
4735 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4737 HOST_WIDE_INT size = int_size_in_bytes (type);
4738 if ((size == 8 || size == 16)
4739 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4740 && TYPE_VECTOR_SUBPARTS (type) > 1)
4742 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4744 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4745 mode = MIN_MODE_VECTOR_FLOAT;
4747 mode = MIN_MODE_VECTOR_INT;
4749 /* Get the mode which has this inner mode and number of units. */
4750 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4751 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4752 && GET_MODE_INNER (mode) == innermode)
4762 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4763 this may not agree with the mode that the type system has chosen for the
4764 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4765 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4768 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4773 if (orig_mode != BLKmode)
4774 tmp = gen_rtx_REG (orig_mode, regno);
4777 tmp = gen_rtx_REG (mode, regno);
4778 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4779 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4785 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4786 of this code is to classify each 8bytes of incoming argument by the register
4787 class and assign registers accordingly. */
4789 /* Return the union class of CLASS1 and CLASS2.
4790 See the x86-64 PS ABI for details. */
4792 static enum x86_64_reg_class
4793 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4795 /* Rule #1: If both classes are equal, this is the resulting class. */
4796 if (class1 == class2)
4799 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4801 if (class1 == X86_64_NO_CLASS)
4803 if (class2 == X86_64_NO_CLASS)
4806 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4807 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4808 return X86_64_MEMORY_CLASS;
4810 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4811 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4812 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4813 return X86_64_INTEGERSI_CLASS;
4814 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4815 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4816 return X86_64_INTEGER_CLASS;
4818 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4820 if (class1 == X86_64_X87_CLASS
4821 || class1 == X86_64_X87UP_CLASS
4822 || class1 == X86_64_COMPLEX_X87_CLASS
4823 || class2 == X86_64_X87_CLASS
4824 || class2 == X86_64_X87UP_CLASS
4825 || class2 == X86_64_COMPLEX_X87_CLASS)
4826 return X86_64_MEMORY_CLASS;
4828 /* Rule #6: Otherwise class SSE is used. */
4829 return X86_64_SSE_CLASS;
4832 /* Classify the argument of type TYPE and mode MODE.
4833 CLASSES will be filled by the register class used to pass each word
4834 of the operand. The number of words is returned. In case the parameter
4835 should be passed in memory, 0 is returned. As a special case for zero
4836 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4838 BIT_OFFSET is used internally for handling records and specifies offset
4839 of the offset in bits modulo 256 to avoid overflow cases.
4841 See the x86-64 PS ABI for details.
4845 classify_argument (enum machine_mode mode, const_tree type,
4846 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4848 HOST_WIDE_INT bytes =
4849 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4850 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4852 /* Variable sized entities are always passed/returned in memory. */
4856 if (mode != VOIDmode
4857 && targetm.calls.must_pass_in_stack (mode, type))
4860 if (type && AGGREGATE_TYPE_P (type))
4864 enum x86_64_reg_class subclasses[MAX_CLASSES];
4866 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
4870 for (i = 0; i < words; i++)
4871 classes[i] = X86_64_NO_CLASS;
4873 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
4874 signalize memory class, so handle it as special case. */
4877 classes[0] = X86_64_NO_CLASS;
4881 /* Classify each field of record and merge classes. */
4882 switch (TREE_CODE (type))
4885 /* And now merge the fields of structure. */
4886 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4888 if (TREE_CODE (field) == FIELD_DECL)
4892 if (TREE_TYPE (field) == error_mark_node)
4895 /* Bitfields are always classified as integer. Handle them
4896 early, since later code would consider them to be
4897 misaligned integers. */
4898 if (DECL_BIT_FIELD (field))
4900 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4901 i < ((int_bit_position (field) + (bit_offset % 64))
4902 + tree_low_cst (DECL_SIZE (field), 0)
4905 merge_classes (X86_64_INTEGER_CLASS,
4910 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4911 TREE_TYPE (field), subclasses,
4912 (int_bit_position (field)
4913 + bit_offset) % 256);
4916 for (i = 0; i < num; i++)
4919 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4921 merge_classes (subclasses[i], classes[i + pos]);
4929 /* Arrays are handled as small records. */
4932 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
4933 TREE_TYPE (type), subclasses, bit_offset);
4937 /* The partial classes are now full classes. */
4938 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
4939 subclasses[0] = X86_64_SSE_CLASS;
4940 if (subclasses[0] == X86_64_INTEGERSI_CLASS
4941 && !((bit_offset % 64) == 0 && bytes == 4))
4942 subclasses[0] = X86_64_INTEGER_CLASS;
4944 for (i = 0; i < words; i++)
4945 classes[i] = subclasses[i % num];
4950 case QUAL_UNION_TYPE:
4951 /* Unions are similar to RECORD_TYPE but offset is always 0.
4953 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4955 if (TREE_CODE (field) == FIELD_DECL)
4959 if (TREE_TYPE (field) == error_mark_node)
4962 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4963 TREE_TYPE (field), subclasses,
4967 for (i = 0; i < num; i++)
4968 classes[i] = merge_classes (subclasses[i], classes[i]);
4977 /* Final merger cleanup. */
4978 for (i = 0; i < words; i++)
4980 /* If one class is MEMORY, everything should be passed in
4982 if (classes[i] == X86_64_MEMORY_CLASS)
4985 /* The X86_64_SSEUP_CLASS should be always preceded by
4986 X86_64_SSE_CLASS. */
4987 if (classes[i] == X86_64_SSEUP_CLASS
4988 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
4989 classes[i] = X86_64_SSE_CLASS;
4991 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
4992 if (classes[i] == X86_64_X87UP_CLASS
4993 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
4994 classes[i] = X86_64_SSE_CLASS;
4999 /* Compute alignment needed. We align all types to natural boundaries with
5000 exception of XFmode that is aligned to 64bits. */
5001 if (mode != VOIDmode && mode != BLKmode)
5003 int mode_alignment = GET_MODE_BITSIZE (mode);
5006 mode_alignment = 128;
5007 else if (mode == XCmode)
5008 mode_alignment = 256;
5009 if (COMPLEX_MODE_P (mode))
5010 mode_alignment /= 2;
5011 /* Misaligned fields are always returned in memory. */
5012 if (bit_offset % mode_alignment)
5016 /* for V1xx modes, just use the base mode */
5017 if (VECTOR_MODE_P (mode) && mode != V1DImode
5018 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5019 mode = GET_MODE_INNER (mode);
5021 /* Classification of atomic types. */
5026 classes[0] = X86_64_SSE_CLASS;
5029 classes[0] = X86_64_SSE_CLASS;
5030 classes[1] = X86_64_SSEUP_CLASS;
5040 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5044 classes[0] = X86_64_INTEGERSI_CLASS;
5047 else if (size <= 64)
5049 classes[0] = X86_64_INTEGER_CLASS;
5052 else if (size <= 64+32)
5054 classes[0] = X86_64_INTEGER_CLASS;
5055 classes[1] = X86_64_INTEGERSI_CLASS;
5058 else if (size <= 64+64)
5060 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5068 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5075 if (!(bit_offset % 64))
5076 classes[0] = X86_64_SSESF_CLASS;
5078 classes[0] = X86_64_SSE_CLASS;
5081 classes[0] = X86_64_SSEDF_CLASS;
5084 classes[0] = X86_64_X87_CLASS;
5085 classes[1] = X86_64_X87UP_CLASS;
5088 classes[0] = X86_64_SSE_CLASS;
5089 classes[1] = X86_64_SSEUP_CLASS;
5092 classes[0] = X86_64_SSE_CLASS;
5095 classes[0] = X86_64_SSEDF_CLASS;
5096 classes[1] = X86_64_SSEDF_CLASS;
5099 classes[0] = X86_64_COMPLEX_X87_CLASS;
5102 /* This modes is larger than 16 bytes. */
5110 classes[0] = X86_64_AVX_CLASS;
5118 classes[0] = X86_64_SSE_CLASS;
5119 classes[1] = X86_64_SSEUP_CLASS;
5126 classes[0] = X86_64_SSE_CLASS;
5132 gcc_assert (VECTOR_MODE_P (mode));
5137 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5139 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5140 classes[0] = X86_64_INTEGERSI_CLASS;
5142 classes[0] = X86_64_INTEGER_CLASS;
5143 classes[1] = X86_64_INTEGER_CLASS;
5144 return 1 + (bytes > 8);
5148 /* Examine the argument and return set number of register required in each
5149 class. Return 0 iff parameter should be passed in memory. */
5151 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5152 int *int_nregs, int *sse_nregs)
5154 enum x86_64_reg_class regclass[MAX_CLASSES];
5155 int n = classify_argument (mode, type, regclass, 0);
5161 for (n--; n >= 0; n--)
5162 switch (regclass[n])
5164 case X86_64_INTEGER_CLASS:
5165 case X86_64_INTEGERSI_CLASS:
5168 case X86_64_AVX_CLASS:
5169 case X86_64_SSE_CLASS:
5170 case X86_64_SSESF_CLASS:
5171 case X86_64_SSEDF_CLASS:
5174 case X86_64_NO_CLASS:
5175 case X86_64_SSEUP_CLASS:
5177 case X86_64_X87_CLASS:
5178 case X86_64_X87UP_CLASS:
5182 case X86_64_COMPLEX_X87_CLASS:
5183 return in_return ? 2 : 0;
5184 case X86_64_MEMORY_CLASS:
5190 /* Construct container for the argument used by GCC interface. See
5191 FUNCTION_ARG for the detailed description. */
5194 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5195 const_tree type, int in_return, int nintregs, int nsseregs,
5196 const int *intreg, int sse_regno)
5198 /* The following variables hold the static issued_error state. */
5199 static bool issued_sse_arg_error;
5200 static bool issued_sse_ret_error;
5201 static bool issued_x87_ret_error;
5203 enum machine_mode tmpmode;
5205 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5206 enum x86_64_reg_class regclass[MAX_CLASSES];
5210 int needed_sseregs, needed_intregs;
5211 rtx exp[MAX_CLASSES];
5214 n = classify_argument (mode, type, regclass, 0);
5217 if (!examine_argument (mode, type, in_return, &needed_intregs,
5220 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5223 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5224 some less clueful developer tries to use floating-point anyway. */
5225 if (needed_sseregs && !TARGET_SSE)
5229 if (!issued_sse_ret_error)
5231 error ("SSE register return with SSE disabled");
5232 issued_sse_ret_error = true;
5235 else if (!issued_sse_arg_error)
5237 error ("SSE register argument with SSE disabled");
5238 issued_sse_arg_error = true;
5243 /* Likewise, error if the ABI requires us to return values in the
5244 x87 registers and the user specified -mno-80387. */
5245 if (!TARGET_80387 && in_return)
5246 for (i = 0; i < n; i++)
5247 if (regclass[i] == X86_64_X87_CLASS
5248 || regclass[i] == X86_64_X87UP_CLASS
5249 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5251 if (!issued_x87_ret_error)
5253 error ("x87 register return with x87 disabled");
5254 issued_x87_ret_error = true;
5259 /* First construct simple cases. Avoid SCmode, since we want to use
5260 single register to pass this type. */
5261 if (n == 1 && mode != SCmode)
5262 switch (regclass[0])
5264 case X86_64_INTEGER_CLASS:
5265 case X86_64_INTEGERSI_CLASS:
5266 return gen_rtx_REG (mode, intreg[0]);
5267 case X86_64_AVX_CLASS:
5268 case X86_64_SSE_CLASS:
5269 case X86_64_SSESF_CLASS:
5270 case X86_64_SSEDF_CLASS:
5271 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
5272 case X86_64_X87_CLASS:
5273 case X86_64_COMPLEX_X87_CLASS:
5274 return gen_rtx_REG (mode, FIRST_STACK_REG);
5275 case X86_64_NO_CLASS:
5276 /* Zero sized array, struct or class. */
5281 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5282 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5283 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5286 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5287 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5288 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5289 && regclass[1] == X86_64_INTEGER_CLASS
5290 && (mode == CDImode || mode == TImode || mode == TFmode)
5291 && intreg[0] + 1 == intreg[1])
5292 return gen_rtx_REG (mode, intreg[0]);
5294 /* Otherwise figure out the entries of the PARALLEL. */
5295 for (i = 0; i < n; i++)
5297 switch (regclass[i])
5299 case X86_64_NO_CLASS:
5301 case X86_64_INTEGER_CLASS:
5302 case X86_64_INTEGERSI_CLASS:
5303 /* Merge TImodes on aligned occasions here too. */
5304 if (i * 8 + 8 > bytes)
5305 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5306 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5310 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5311 if (tmpmode == BLKmode)
5313 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5314 gen_rtx_REG (tmpmode, *intreg),
5318 case X86_64_SSESF_CLASS:
5319 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5320 gen_rtx_REG (SFmode,
5321 SSE_REGNO (sse_regno)),
5325 case X86_64_SSEDF_CLASS:
5326 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5327 gen_rtx_REG (DFmode,
5328 SSE_REGNO (sse_regno)),
5332 case X86_64_SSE_CLASS:
5333 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
5337 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5338 gen_rtx_REG (tmpmode,
5339 SSE_REGNO (sse_regno)),
5341 if (tmpmode == TImode)
5350 /* Empty aligned struct, union or class. */
5354 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5355 for (i = 0; i < nexps; i++)
5356 XVECEXP (ret, 0, i) = exp [i];
5360 /* Update the data in CUM to advance over an argument of mode MODE
5361 and data type TYPE. (TYPE is null for libcalls where that information
5362 may not be available.) */
5365 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5366 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5382 cum->words += words;
5383 cum->nregs -= words;
5384 cum->regno += words;
5386 if (cum->nregs <= 0)
5394 if (cum->float_in_sse < 2)
5397 if (cum->float_in_sse < 1)
5415 if (!type || !AGGREGATE_TYPE_P (type))
5417 cum->sse_words += words;
5418 cum->sse_nregs -= 1;
5419 cum->sse_regno += 1;
5420 if (cum->sse_nregs <= 0)
5433 if (!type || !AGGREGATE_TYPE_P (type))
5435 cum->mmx_words += words;
5436 cum->mmx_nregs -= 1;
5437 cum->mmx_regno += 1;
5438 if (cum->mmx_nregs <= 0)
5449 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5450 tree type, HOST_WIDE_INT words, int named)
5452 int int_nregs, sse_nregs;
5454 /* Unnamed 256bit vector mode parameters are passed on stack. */
5455 if (!named && VALID_AVX256_REG_MODE (mode))
5458 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5459 cum->words += words;
5460 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5462 cum->nregs -= int_nregs;
5463 cum->sse_nregs -= sse_nregs;
5464 cum->regno += int_nregs;
5465 cum->sse_regno += sse_nregs;
5468 cum->words += words;
5472 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5473 HOST_WIDE_INT words)
5475 /* Otherwise, this should be passed indirect. */
5476 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5478 cum->words += words;
5487 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5488 tree type, int named)
5490 HOST_WIDE_INT bytes, words;
5492 if (mode == BLKmode)
5493 bytes = int_size_in_bytes (type);
5495 bytes = GET_MODE_SIZE (mode);
5496 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5499 mode = type_natural_mode (type);
5501 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5502 function_arg_advance_ms_64 (cum, bytes, words);
5503 else if (TARGET_64BIT)
5504 function_arg_advance_64 (cum, mode, type, words, named);
5506 function_arg_advance_32 (cum, mode, type, bytes, words);
5509 /* Define where to put the arguments to a function.
5510 Value is zero to push the argument on the stack,
5511 or a hard register in which to store the argument.
5513 MODE is the argument's machine mode.
5514 TYPE is the data type of the argument (as a tree).
5515 This is null for libcalls where that information may
5517 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5518 the preceding args and about the function being called.
5519 NAMED is nonzero if this argument is a named parameter
5520 (otherwise it is an extra parameter matching an ellipsis). */
5523 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5524 enum machine_mode orig_mode, tree type,
5525 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5527 static bool warnedavx, warnedsse, warnedmmx;
5529 /* Avoid the AL settings for the Unix64 ABI. */
5530 if (mode == VOIDmode)
5546 if (words <= cum->nregs)
5548 int regno = cum->regno;
5550 /* Fastcall allocates the first two DWORD (SImode) or
5551 smaller arguments to ECX and EDX if it isn't an
5557 || (type && AGGREGATE_TYPE_P (type)))
5560 /* ECX not EAX is the first allocated register. */
5561 if (regno == AX_REG)
5564 return gen_rtx_REG (mode, regno);
5569 if (cum->float_in_sse < 2)
5572 if (cum->float_in_sse < 1)
5576 /* In 32bit, we pass TImode in xmm registers. */
5583 if (!type || !AGGREGATE_TYPE_P (type))
5585 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5588 warning (0, "SSE vector argument without SSE enabled "
5592 return gen_reg_or_parallel (mode, orig_mode,
5593 cum->sse_regno + FIRST_SSE_REG);
5598 /* In 32bit, we pass OImode in ymm registers. */
5605 if (!type || !AGGREGATE_TYPE_P (type))
5607 if (!TARGET_AVX && !warnedavx && cum->warn_avx)
5610 warning (0, "AVX vector argument without AVX enabled "
5614 return gen_reg_or_parallel (mode, orig_mode,
5615 cum->sse_regno + FIRST_SSE_REG);
5624 if (!type || !AGGREGATE_TYPE_P (type))
5626 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5629 warning (0, "MMX vector argument without MMX enabled "
5633 return gen_reg_or_parallel (mode, orig_mode,
5634 cum->mmx_regno + FIRST_MMX_REG);
5643 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5644 enum machine_mode orig_mode, tree type, int named)
5646 static bool warnedavx;
5648 /* Handle a hidden AL argument containing number of registers
5649 for varargs x86-64 functions. */
5650 if (mode == VOIDmode)
5651 return GEN_INT (cum->maybe_vaarg
5652 ? (cum->sse_nregs < 0
5653 ? (cum->call_abi == DEFAULT_ABI
5655 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5656 : X64_SSE_REGPARM_MAX))
5671 /* In 64bit, we pass TImode in interger registers and OImode on
5673 if (!type || !AGGREGATE_TYPE_P (type))
5675 if (!TARGET_AVX && !warnedavx && cum->warn_avx)
5678 warning (0, "AVX vector argument without AVX enabled "
5683 /* Unnamed 256bit vector mode parameters are passed on stack. */
5689 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5691 &x86_64_int_parameter_registers [cum->regno],
5696 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5697 enum machine_mode orig_mode, int named,
5698 HOST_WIDE_INT bytes)
5702 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5703 We use value of -2 to specify that current function call is MSABI. */
5704 if (mode == VOIDmode)
5705 return GEN_INT (-2);
5707 /* If we've run out of registers, it goes on the stack. */
5708 if (cum->nregs == 0)
5711 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5713 /* Only floating point modes are passed in anything but integer regs. */
5714 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5717 regno = cum->regno + FIRST_SSE_REG;
5722 /* Unnamed floating parameters are passed in both the
5723 SSE and integer registers. */
5724 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5725 t2 = gen_rtx_REG (mode, regno);
5726 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5727 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5728 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5731 /* Handle aggregated types passed in register. */
5732 if (orig_mode == BLKmode)
5734 if (bytes > 0 && bytes <= 8)
5735 mode = (bytes > 4 ? DImode : SImode);
5736 if (mode == BLKmode)
5740 return gen_reg_or_parallel (mode, orig_mode, regno);
5744 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5745 tree type, int named)
5747 enum machine_mode mode = omode;
5748 HOST_WIDE_INT bytes, words;
5750 if (mode == BLKmode)
5751 bytes = int_size_in_bytes (type);
5753 bytes = GET_MODE_SIZE (mode);
5754 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5756 /* To simplify the code below, represent vector types with a vector mode
5757 even if MMX/SSE are not active. */
5758 if (type && TREE_CODE (type) == VECTOR_TYPE)
5759 mode = type_natural_mode (type);
5761 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5762 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5763 else if (TARGET_64BIT)
5764 return function_arg_64 (cum, mode, omode, type, named);
5766 return function_arg_32 (cum, mode, omode, type, bytes, words);
5769 /* A C expression that indicates when an argument must be passed by
5770 reference. If nonzero for an argument, a copy of that argument is
5771 made in memory and a pointer to the argument is passed instead of
5772 the argument itself. The pointer is passed in whatever way is
5773 appropriate for passing a pointer to that type. */
5776 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5777 enum machine_mode mode ATTRIBUTE_UNUSED,
5778 const_tree type, bool named ATTRIBUTE_UNUSED)
5780 /* See Windows x64 Software Convention. */
5781 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5783 int msize = (int) GET_MODE_SIZE (mode);
5786 /* Arrays are passed by reference. */
5787 if (TREE_CODE (type) == ARRAY_TYPE)
5790 if (AGGREGATE_TYPE_P (type))
5792 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
5793 are passed by reference. */
5794 msize = int_size_in_bytes (type);
5798 /* __m128 is passed by reference. */
5800 case 1: case 2: case 4: case 8:
5806 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
5812 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
5815 contains_aligned_value_p (tree type)
5817 enum machine_mode mode = TYPE_MODE (type);
5818 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
5822 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
5824 if (TYPE_ALIGN (type) < 128)
5827 if (AGGREGATE_TYPE_P (type))
5829 /* Walk the aggregates recursively. */
5830 switch (TREE_CODE (type))
5834 case QUAL_UNION_TYPE:
5838 /* Walk all the structure fields. */
5839 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5841 if (TREE_CODE (field) == FIELD_DECL
5842 && contains_aligned_value_p (TREE_TYPE (field)))
5849 /* Just for use if some languages passes arrays by value. */
5850 if (contains_aligned_value_p (TREE_TYPE (type)))
5861 /* Gives the alignment boundary, in bits, of an argument with the
5862 specified mode and type. */
5865 ix86_function_arg_boundary (enum machine_mode mode, tree type)
5870 /* Since canonical type is used for call, we convert it to
5871 canonical type if needed. */
5872 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
5873 type = TYPE_CANONICAL (type);
5874 align = TYPE_ALIGN (type);
5877 align = GET_MODE_ALIGNMENT (mode);
5878 if (align < PARM_BOUNDARY)
5879 align = PARM_BOUNDARY;
5880 /* In 32bit, only _Decimal128 and __float128 are aligned to their
5881 natural boundaries. */
5882 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
5884 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
5885 make an exception for SSE modes since these require 128bit
5888 The handling here differs from field_alignment. ICC aligns MMX
5889 arguments to 4 byte boundaries, while structure fields are aligned
5890 to 8 byte boundaries. */
5893 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
5894 align = PARM_BOUNDARY;
5898 if (!contains_aligned_value_p (type))
5899 align = PARM_BOUNDARY;
5902 if (align > BIGGEST_ALIGNMENT)
5903 align = BIGGEST_ALIGNMENT;
5907 /* Return true if N is a possible register number of function value. */
5910 ix86_function_value_regno_p (int regno)
5917 case FIRST_FLOAT_REG:
5918 /* TODO: The function should depend on current function ABI but
5919 builtins.c would need updating then. Therefore we use the
5921 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
5923 return TARGET_FLOAT_RETURNS_IN_80387;
5929 if (TARGET_MACHO || TARGET_64BIT)
5937 /* Define how to find the value returned by a function.
5938 VALTYPE is the data type of the value (as a tree).
5939 If the precise function being called is known, FUNC is its FUNCTION_DECL;
5940 otherwise, FUNC is 0. */
5943 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
5944 const_tree fntype, const_tree fn)
5948 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
5949 we normally prevent this case when mmx is not available. However
5950 some ABIs may require the result to be returned like DImode. */
5951 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5952 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
5954 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
5955 we prevent this case when sse is not available. However some ABIs
5956 may require the result to be returned like integer TImode. */
5957 else if (mode == TImode
5958 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5959 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
5961 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
5962 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
5963 regno = FIRST_FLOAT_REG;
5965 /* Most things go in %eax. */
5968 /* Override FP return register with %xmm0 for local functions when
5969 SSE math is enabled or for functions with sseregparm attribute. */
5970 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
5972 int sse_level = ix86_function_sseregparm (fntype, fn, false);
5973 if ((sse_level >= 1 && mode == SFmode)
5974 || (sse_level == 2 && mode == DFmode))
5975 regno = FIRST_SSE_REG;
5978 return gen_rtx_REG (orig_mode, regno);
5982 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
5987 /* Handle libcalls, which don't provide a type node. */
5988 if (valtype == NULL)
6000 return gen_rtx_REG (mode, FIRST_SSE_REG);
6003 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6007 return gen_rtx_REG (mode, AX_REG);
6011 ret = construct_container (mode, orig_mode, valtype, 1,
6012 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6013 x86_64_int_return_registers, 0);
6015 /* For zero sized structures, construct_container returns NULL, but we
6016 need to keep rest of compiler happy by returning meaningful value. */
6018 ret = gen_rtx_REG (orig_mode, AX_REG);
6024 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6026 unsigned int regno = AX_REG;
6030 switch (GET_MODE_SIZE (mode))
6033 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6034 && !COMPLEX_MODE_P (mode))
6035 regno = FIRST_SSE_REG;
6039 if (mode == SFmode || mode == DFmode)
6040 regno = FIRST_SSE_REG;
6046 return gen_rtx_REG (orig_mode, regno);
6050 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6051 enum machine_mode orig_mode, enum machine_mode mode)
6053 const_tree fn, fntype;
6056 if (fntype_or_decl && DECL_P (fntype_or_decl))
6057 fn = fntype_or_decl;
6058 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6060 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6061 return function_value_ms_64 (orig_mode, mode);
6062 else if (TARGET_64BIT)
6063 return function_value_64 (orig_mode, mode, valtype);
6065 return function_value_32 (orig_mode, mode, fntype, fn);
6069 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6070 bool outgoing ATTRIBUTE_UNUSED)
6072 enum machine_mode mode, orig_mode;
6074 orig_mode = TYPE_MODE (valtype);
6075 mode = type_natural_mode (valtype);
6076 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6080 ix86_libcall_value (enum machine_mode mode)
6082 return ix86_function_value_1 (NULL, NULL, mode, mode);
6085 /* Return true iff type is returned in memory. */
6087 static int ATTRIBUTE_UNUSED
6088 return_in_memory_32 (const_tree type, enum machine_mode mode)
6092 if (mode == BLKmode)
6095 size = int_size_in_bytes (type);
6097 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6100 if (VECTOR_MODE_P (mode) || mode == TImode)
6102 /* User-created vectors small enough to fit in EAX. */
6106 /* MMX/3dNow values are returned in MM0,
6107 except when it doesn't exits. */
6109 return (TARGET_MMX ? 0 : 1);
6111 /* SSE values are returned in XMM0, except when it doesn't exist. */
6113 return (TARGET_SSE ? 0 : 1);
6124 static int ATTRIBUTE_UNUSED
6125 return_in_memory_64 (const_tree type, enum machine_mode mode)
6127 int needed_intregs, needed_sseregs;
6128 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6131 static int ATTRIBUTE_UNUSED
6132 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6134 HOST_WIDE_INT size = int_size_in_bytes (type);
6136 /* __m128 is returned in xmm0. */
6137 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6138 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6141 /* Otherwise, the size must be exactly in [1248]. */
6142 return (size != 1 && size != 2 && size != 4 && size != 8);
6146 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6148 #ifdef SUBTARGET_RETURN_IN_MEMORY
6149 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6151 const enum machine_mode mode = type_natural_mode (type);
6153 if (TARGET_64BIT_MS_ABI)
6154 return return_in_memory_ms_64 (type, mode);
6155 else if (TARGET_64BIT)
6156 return return_in_memory_64 (type, mode);
6158 return return_in_memory_32 (type, mode);
6162 /* Return false iff TYPE is returned in memory. This version is used
6163 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6164 but differs notably in that when MMX is available, 8-byte vectors
6165 are returned in memory, rather than in MMX registers. */
6168 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6171 enum machine_mode mode = type_natural_mode (type);
6174 return return_in_memory_64 (type, mode);
6176 if (mode == BLKmode)
6179 size = int_size_in_bytes (type);
6181 if (VECTOR_MODE_P (mode))
6183 /* Return in memory only if MMX registers *are* available. This
6184 seems backwards, but it is consistent with the existing
6191 else if (mode == TImode)
6193 else if (mode == XFmode)
6199 /* When returning SSE vector types, we have a choice of either
6200 (1) being abi incompatible with a -march switch, or
6201 (2) generating an error.
6202 Given no good solution, I think the safest thing is one warning.
6203 The user won't be able to use -Werror, but....
6205 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6206 called in response to actually generating a caller or callee that
6207 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6208 via aggregate_value_p for general type probing from tree-ssa. */
6211 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6213 static bool warnedsse, warnedmmx;
6215 if (!TARGET_64BIT && type)
6217 /* Look at the return type of the function, not the function type. */
6218 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6220 if (!TARGET_SSE && !warnedsse)
6223 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6226 warning (0, "SSE vector return without SSE enabled "
6231 if (!TARGET_MMX && !warnedmmx)
6233 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6236 warning (0, "MMX vector return without MMX enabled "
6246 /* Create the va_list data type. */
6248 /* Returns the calling convention specific va_list date type.
6249 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6252 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6254 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6256 /* For i386 we use plain pointer to argument area. */
6257 if (!TARGET_64BIT || abi == MS_ABI)
6258 return build_pointer_type (char_type_node);
6260 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6261 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6263 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6264 unsigned_type_node);
6265 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6266 unsigned_type_node);
6267 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6269 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6272 va_list_gpr_counter_field = f_gpr;
6273 va_list_fpr_counter_field = f_fpr;
6275 DECL_FIELD_CONTEXT (f_gpr) = record;
6276 DECL_FIELD_CONTEXT (f_fpr) = record;
6277 DECL_FIELD_CONTEXT (f_ovf) = record;
6278 DECL_FIELD_CONTEXT (f_sav) = record;
6280 TREE_CHAIN (record) = type_decl;
6281 TYPE_NAME (record) = type_decl;
6282 TYPE_FIELDS (record) = f_gpr;
6283 TREE_CHAIN (f_gpr) = f_fpr;
6284 TREE_CHAIN (f_fpr) = f_ovf;
6285 TREE_CHAIN (f_ovf) = f_sav;
6287 layout_type (record);
6289 /* The correct type is an array type of one element. */
6290 return build_array_type (record, build_index_type (size_zero_node));
6293 /* Setup the builtin va_list data type and for 64-bit the additional
6294 calling convention specific va_list data types. */
6297 ix86_build_builtin_va_list (void)
6299 tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
6301 /* Initialize abi specific va_list builtin types. */
6305 if (DEFAULT_ABI == MS_ABI)
6307 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6308 if (TREE_CODE (t) != RECORD_TYPE)
6309 t = build_variant_type_copy (t);
6310 sysv_va_list_type_node = t;
6315 if (TREE_CODE (t) != RECORD_TYPE)
6316 t = build_variant_type_copy (t);
6317 sysv_va_list_type_node = t;
6319 if (DEFAULT_ABI != MS_ABI)
6321 t = ix86_build_builtin_va_list_abi (MS_ABI);
6322 if (TREE_CODE (t) != RECORD_TYPE)
6323 t = build_variant_type_copy (t);
6324 ms_va_list_type_node = t;
6329 if (TREE_CODE (t) != RECORD_TYPE)
6330 t = build_variant_type_copy (t);
6331 ms_va_list_type_node = t;
6338 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6341 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6350 int regparm = ix86_regparm;
6352 if (cum->call_abi != DEFAULT_ABI)
6353 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6355 /* GPR size of varargs save area. */
6356 if (cfun->va_list_gpr_size)
6357 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6359 ix86_varargs_gpr_size = 0;
6361 /* FPR size of varargs save area. We don't need it if we don't pass
6362 anything in SSE registers. */
6363 if (cum->sse_nregs && cfun->va_list_fpr_size)
6364 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6366 ix86_varargs_fpr_size = 0;
6368 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6371 save_area = frame_pointer_rtx;
6372 set = get_varargs_alias_set ();
6374 for (i = cum->regno;
6376 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6379 mem = gen_rtx_MEM (Pmode,
6380 plus_constant (save_area, i * UNITS_PER_WORD));
6381 MEM_NOTRAP_P (mem) = 1;
6382 set_mem_alias_set (mem, set);
6383 emit_move_insn (mem, gen_rtx_REG (Pmode,
6384 x86_64_int_parameter_registers[i]));
6387 if (ix86_varargs_fpr_size)
6389 /* Now emit code to save SSE registers. The AX parameter contains number
6390 of SSE parameter registers used to call this function. We use
6391 sse_prologue_save insn template that produces computed jump across
6392 SSE saves. We need some preparation work to get this working. */
6394 label = gen_label_rtx ();
6395 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6397 /* Compute address to jump to :
6398 label - eax*4 + nnamed_sse_arguments*4 Or
6399 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6400 tmp_reg = gen_reg_rtx (Pmode);
6401 nsse_reg = gen_reg_rtx (Pmode);
6402 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6403 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6404 gen_rtx_MULT (Pmode, nsse_reg,
6407 /* vmovaps is one byte longer than movaps. */
6409 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6410 gen_rtx_PLUS (Pmode, tmp_reg,
6416 gen_rtx_CONST (DImode,
6417 gen_rtx_PLUS (DImode,
6419 GEN_INT (cum->sse_regno
6420 * (TARGET_AVX ? 5 : 4)))));
6422 emit_move_insn (nsse_reg, label_ref);
6423 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6425 /* Compute address of memory block we save into. We always use pointer
6426 pointing 127 bytes after first byte to store - this is needed to keep
6427 instruction size limited by 4 bytes (5 bytes for AVX) with one
6428 byte displacement. */
6429 tmp_reg = gen_reg_rtx (Pmode);
6430 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6431 plus_constant (save_area,
6432 ix86_varargs_gpr_size + 127)));
6433 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6434 MEM_NOTRAP_P (mem) = 1;
6435 set_mem_alias_set (mem, set);
6436 set_mem_align (mem, BITS_PER_WORD);
6438 /* And finally do the dirty job! */
6439 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6440 GEN_INT (cum->sse_regno), label));
6445 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6447 alias_set_type set = get_varargs_alias_set ();
6450 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6454 mem = gen_rtx_MEM (Pmode,
6455 plus_constant (virtual_incoming_args_rtx,
6456 i * UNITS_PER_WORD));
6457 MEM_NOTRAP_P (mem) = 1;
6458 set_mem_alias_set (mem, set);
6460 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6461 emit_move_insn (mem, reg);
6466 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6467 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6470 CUMULATIVE_ARGS next_cum;
6473 /* This argument doesn't appear to be used anymore. Which is good,
6474 because the old code here didn't suppress rtl generation. */
6475 gcc_assert (!no_rtl);
6480 fntype = TREE_TYPE (current_function_decl);
6482 /* For varargs, we do not want to skip the dummy va_dcl argument.
6483 For stdargs, we do want to skip the last named argument. */
6485 if (stdarg_p (fntype))
6486 function_arg_advance (&next_cum, mode, type, 1);
6488 if (cum->call_abi == MS_ABI)
6489 setup_incoming_varargs_ms_64 (&next_cum);
6491 setup_incoming_varargs_64 (&next_cum);
6494 /* Checks if TYPE is of kind va_list char *. */
6497 is_va_list_char_pointer (tree type)
6501 /* For 32-bit it is always true. */
6504 canonic = ix86_canonical_va_list_type (type);
6505 return (canonic == ms_va_list_type_node
6506 || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
6509 /* Implement va_start. */
6512 ix86_va_start (tree valist, rtx nextarg)
6514 HOST_WIDE_INT words, n_gpr, n_fpr;
6515 tree f_gpr, f_fpr, f_ovf, f_sav;
6516 tree gpr, fpr, ovf, sav, t;
6519 /* Only 64bit target needs something special. */
6520 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6522 std_expand_builtin_va_start (valist, nextarg);
6526 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6527 f_fpr = TREE_CHAIN (f_gpr);
6528 f_ovf = TREE_CHAIN (f_fpr);
6529 f_sav = TREE_CHAIN (f_ovf);
6531 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6532 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6533 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6534 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6535 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6537 /* Count number of gp and fp argument registers used. */
6538 words = crtl->args.info.words;
6539 n_gpr = crtl->args.info.regno;
6540 n_fpr = crtl->args.info.sse_regno;
6542 if (cfun->va_list_gpr_size)
6544 type = TREE_TYPE (gpr);
6545 t = build2 (MODIFY_EXPR, type,
6546 gpr, build_int_cst (type, n_gpr * 8));
6547 TREE_SIDE_EFFECTS (t) = 1;
6548 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6551 if (TARGET_SSE && cfun->va_list_fpr_size)
6553 type = TREE_TYPE (fpr);
6554 t = build2 (MODIFY_EXPR, type, fpr,
6555 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6556 TREE_SIDE_EFFECTS (t) = 1;
6557 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6560 /* Find the overflow area. */
6561 type = TREE_TYPE (ovf);
6562 t = make_tree (type, crtl->args.internal_arg_pointer);
6564 t = build2 (POINTER_PLUS_EXPR, type, t,
6565 size_int (words * UNITS_PER_WORD));
6566 t = build2 (MODIFY_EXPR, type, ovf, t);
6567 TREE_SIDE_EFFECTS (t) = 1;
6568 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6570 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6572 /* Find the register save area.
6573 Prologue of the function save it right above stack frame. */
6574 type = TREE_TYPE (sav);
6575 t = make_tree (type, frame_pointer_rtx);
6576 if (!ix86_varargs_gpr_size)
6577 t = build2 (POINTER_PLUS_EXPR, type, t,
6578 size_int (-8 * X86_64_REGPARM_MAX));
6579 t = build2 (MODIFY_EXPR, type, sav, t);
6580 TREE_SIDE_EFFECTS (t) = 1;
6581 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6585 /* Implement va_arg. */
6588 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6591 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6592 tree f_gpr, f_fpr, f_ovf, f_sav;
6593 tree gpr, fpr, ovf, sav, t;
6595 tree lab_false, lab_over = NULL_TREE;
6600 enum machine_mode nat_mode;
6603 /* Only 64bit target needs something special. */
6604 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6605 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6607 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6608 f_fpr = TREE_CHAIN (f_gpr);
6609 f_ovf = TREE_CHAIN (f_fpr);
6610 f_sav = TREE_CHAIN (f_ovf);
6612 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6613 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6614 valist = build_va_arg_indirect_ref (valist);
6615 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6616 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6617 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6619 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6621 type = build_pointer_type (type);
6622 size = int_size_in_bytes (type);
6623 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6625 nat_mode = type_natural_mode (type);
6634 /* Unnamed 256bit vector mode parameters are passed on stack. */
6635 if (ix86_cfun_abi () == SYSV_ABI)
6642 container = construct_container (nat_mode, TYPE_MODE (type),
6643 type, 0, X86_64_REGPARM_MAX,
6644 X86_64_SSE_REGPARM_MAX, intreg,
6649 /* Pull the value out of the saved registers. */
6651 addr = create_tmp_var (ptr_type_node, "addr");
6652 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6656 int needed_intregs, needed_sseregs;
6658 tree int_addr, sse_addr;
6660 lab_false = create_artificial_label ();
6661 lab_over = create_artificial_label ();
6663 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6665 need_temp = (!REG_P (container)
6666 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6667 || TYPE_ALIGN (type) > 128));
6669 /* In case we are passing structure, verify that it is consecutive block
6670 on the register save area. If not we need to do moves. */
6671 if (!need_temp && !REG_P (container))
6673 /* Verify that all registers are strictly consecutive */
6674 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6678 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6680 rtx slot = XVECEXP (container, 0, i);
6681 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6682 || INTVAL (XEXP (slot, 1)) != i * 16)
6690 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6692 rtx slot = XVECEXP (container, 0, i);
6693 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6694 || INTVAL (XEXP (slot, 1)) != i * 8)
6706 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6707 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6708 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6709 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6712 /* First ensure that we fit completely in registers. */
6715 t = build_int_cst (TREE_TYPE (gpr),
6716 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6717 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6718 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6719 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6720 gimplify_and_add (t, pre_p);
6724 t = build_int_cst (TREE_TYPE (fpr),
6725 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6726 + X86_64_REGPARM_MAX * 8);
6727 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6728 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6729 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6730 gimplify_and_add (t, pre_p);
6733 /* Compute index to start of area used for integer regs. */
6736 /* int_addr = gpr + sav; */
6737 t = fold_convert (sizetype, gpr);
6738 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6739 gimplify_assign (int_addr, t, pre_p);
6743 /* sse_addr = fpr + sav; */
6744 t = fold_convert (sizetype, fpr);
6745 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6746 gimplify_assign (sse_addr, t, pre_p);
6751 tree temp = create_tmp_var (type, "va_arg_tmp");
6754 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6755 gimplify_assign (addr, t, pre_p);
6757 for (i = 0; i < XVECLEN (container, 0); i++)
6759 rtx slot = XVECEXP (container, 0, i);
6760 rtx reg = XEXP (slot, 0);
6761 enum machine_mode mode = GET_MODE (reg);
6762 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6763 tree addr_type = build_pointer_type (piece_type);
6764 tree daddr_type = build_pointer_type_for_mode (piece_type,
6768 tree dest_addr, dest;
6770 if (SSE_REGNO_P (REGNO (reg)))
6772 src_addr = sse_addr;
6773 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
6777 src_addr = int_addr;
6778 src_offset = REGNO (reg) * 8;
6780 src_addr = fold_convert (addr_type, src_addr);
6781 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
6782 size_int (src_offset));
6783 src = build_va_arg_indirect_ref (src_addr);
6785 dest_addr = fold_convert (daddr_type, addr);
6786 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
6787 size_int (INTVAL (XEXP (slot, 1))));
6788 dest = build_va_arg_indirect_ref (dest_addr);
6790 gimplify_assign (dest, src, pre_p);
6796 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
6797 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
6798 gimplify_assign (gpr, t, pre_p);
6803 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
6804 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
6805 gimplify_assign (fpr, t, pre_p);
6808 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
6810 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
6813 /* ... otherwise out of the overflow area. */
6815 /* When we align parameter on stack for caller, if the parameter
6816 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
6817 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
6818 here with caller. */
6819 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
6820 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
6821 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
6823 /* Care for on-stack alignment if needed. */
6824 if (arg_boundary <= 64
6825 || integer_zerop (TYPE_SIZE (type)))
6829 HOST_WIDE_INT align = arg_boundary / 8;
6830 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
6831 size_int (align - 1));
6832 t = fold_convert (sizetype, t);
6833 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6835 t = fold_convert (TREE_TYPE (ovf), t);
6837 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
6838 gimplify_assign (addr, t, pre_p);
6840 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
6841 size_int (rsize * UNITS_PER_WORD));
6842 gimplify_assign (unshare_expr (ovf), t, pre_p);
6845 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
6847 ptrtype = build_pointer_type (type);
6848 addr = fold_convert (ptrtype, addr);
6851 addr = build_va_arg_indirect_ref (addr);
6852 return build_va_arg_indirect_ref (addr);
6855 /* Return nonzero if OPNUM's MEM should be matched
6856 in movabs* patterns. */
6859 ix86_check_movabs (rtx insn, int opnum)
6863 set = PATTERN (insn);
6864 if (GET_CODE (set) == PARALLEL)
6865 set = XVECEXP (set, 0, 0);
6866 gcc_assert (GET_CODE (set) == SET);
6867 mem = XEXP (set, opnum);
6868 while (GET_CODE (mem) == SUBREG)
6869 mem = SUBREG_REG (mem);
6870 gcc_assert (MEM_P (mem));
6871 return (volatile_ok || !MEM_VOLATILE_P (mem));
6874 /* Initialize the table of extra 80387 mathematical constants. */
6877 init_ext_80387_constants (void)
6879 static const char * cst[5] =
6881 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
6882 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
6883 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
6884 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
6885 "3.1415926535897932385128089594061862044", /* 4: fldpi */
6889 for (i = 0; i < 5; i++)
6891 real_from_string (&ext_80387_constants_table[i], cst[i]);
6892 /* Ensure each constant is rounded to XFmode precision. */
6893 real_convert (&ext_80387_constants_table[i],
6894 XFmode, &ext_80387_constants_table[i]);
6897 ext_80387_constants_init = 1;
6900 /* Return true if the constant is something that can be loaded with
6901 a special instruction. */
6904 standard_80387_constant_p (rtx x)
6906 enum machine_mode mode = GET_MODE (x);
6910 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
6913 if (x == CONST0_RTX (mode))
6915 if (x == CONST1_RTX (mode))
6918 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6920 /* For XFmode constants, try to find a special 80387 instruction when
6921 optimizing for size or on those CPUs that benefit from them. */
6923 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
6927 if (! ext_80387_constants_init)
6928 init_ext_80387_constants ();
6930 for (i = 0; i < 5; i++)
6931 if (real_identical (&r, &ext_80387_constants_table[i]))
6935 /* Load of the constant -0.0 or -1.0 will be split as
6936 fldz;fchs or fld1;fchs sequence. */
6937 if (real_isnegzero (&r))
6939 if (real_identical (&r, &dconstm1))
6945 /* Return the opcode of the special instruction to be used to load
6949 standard_80387_constant_opcode (rtx x)
6951 switch (standard_80387_constant_p (x))
6975 /* Return the CONST_DOUBLE representing the 80387 constant that is
6976 loaded by the specified special instruction. The argument IDX
6977 matches the return value from standard_80387_constant_p. */
6980 standard_80387_constant_rtx (int idx)
6984 if (! ext_80387_constants_init)
6985 init_ext_80387_constants ();
7001 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7005 /* Return 1 if mode is a valid mode for sse. */
7007 standard_sse_mode_p (enum machine_mode mode)
7024 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7025 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7026 modes and AVX is enabled. */
7029 standard_sse_constant_p (rtx x)
7031 enum machine_mode mode = GET_MODE (x);
7033 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7035 if (vector_all_ones_operand (x, mode))
7037 if (standard_sse_mode_p (mode))
7038 return TARGET_SSE2 ? 2 : -2;
7039 else if (VALID_AVX256_REG_MODE (mode))
7040 return TARGET_AVX ? 3 : -3;
7046 /* Return the opcode of the special instruction to be used to load
7050 standard_sse_constant_opcode (rtx insn, rtx x)
7052 switch (standard_sse_constant_p (x))
7055 switch (get_attr_mode (insn))
7058 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7060 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7062 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7064 return "vxorps\t%x0, %x0, %x0";
7066 return "vxorpd\t%x0, %x0, %x0";
7068 return "vpxor\t%x0, %x0, %x0";
7074 switch (get_attr_mode (insn))
7079 return "vpcmpeqd\t%0, %0, %0";
7085 return "pcmpeqd\t%0, %0";
7090 /* Returns 1 if OP contains a symbol reference */
7093 symbolic_reference_mentioned_p (rtx op)
7098 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7101 fmt = GET_RTX_FORMAT (GET_CODE (op));
7102 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7108 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7109 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7113 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7120 /* Return 1 if it is appropriate to emit `ret' instructions in the
7121 body of a function. Do this only if the epilogue is simple, needing a
7122 couple of insns. Prior to reloading, we can't tell how many registers
7123 must be saved, so return 0 then. Return 0 if there is no frame
7124 marker to de-allocate. */
7127 ix86_can_use_return_insn_p (void)
7129 struct ix86_frame frame;
7131 if (! reload_completed || frame_pointer_needed)
7134 /* Don't allow more than 32 pop, since that's all we can do
7135 with one instruction. */
7136 if (crtl->args.pops_args
7137 && crtl->args.size >= 32768)
7140 ix86_compute_frame_layout (&frame);
7141 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7144 /* Value should be nonzero if functions must have frame pointers.
7145 Zero means the frame pointer need not be set up (and parms may
7146 be accessed via the stack pointer) in functions that seem suitable. */
7149 ix86_frame_pointer_required (void)
7151 /* If we accessed previous frames, then the generated code expects
7152 to be able to access the saved ebp value in our frame. */
7153 if (cfun->machine->accesses_prev_frame)
7156 /* Several x86 os'es need a frame pointer for other reasons,
7157 usually pertaining to setjmp. */
7158 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7161 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7162 the frame pointer by default. Turn it back on now if we've not
7163 got a leaf function. */
7164 if (TARGET_OMIT_LEAF_FRAME_POINTER
7165 && (!current_function_is_leaf
7166 || ix86_current_function_calls_tls_descriptor))
7175 /* Record that the current function accesses previous call frames. */
7178 ix86_setup_frame_addresses (void)
7180 cfun->machine->accesses_prev_frame = 1;
7183 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7184 # define USE_HIDDEN_LINKONCE 1
7186 # define USE_HIDDEN_LINKONCE 0
7189 static int pic_labels_used;
7191 /* Fills in the label name that should be used for a pc thunk for
7192 the given register. */
7195 get_pc_thunk_name (char name[32], unsigned int regno)
7197 gcc_assert (!TARGET_64BIT);
7199 if (USE_HIDDEN_LINKONCE)
7200 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7202 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7206 /* This function generates code for -fpic that loads %ebx with
7207 the return address of the caller and then returns. */
7210 ix86_file_end (void)
7215 for (regno = 0; regno < 8; ++regno)
7219 if (! ((pic_labels_used >> regno) & 1))
7222 get_pc_thunk_name (name, regno);
7227 switch_to_section (darwin_sections[text_coal_section]);
7228 fputs ("\t.weak_definition\t", asm_out_file);
7229 assemble_name (asm_out_file, name);
7230 fputs ("\n\t.private_extern\t", asm_out_file);
7231 assemble_name (asm_out_file, name);
7232 fputs ("\n", asm_out_file);
7233 ASM_OUTPUT_LABEL (asm_out_file, name);
7237 if (USE_HIDDEN_LINKONCE)
7241 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7243 TREE_PUBLIC (decl) = 1;
7244 TREE_STATIC (decl) = 1;
7245 DECL_ONE_ONLY (decl) = 1;
7247 (*targetm.asm_out.unique_section) (decl, 0);
7248 switch_to_section (get_named_section (decl, NULL, 0));
7250 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7251 fputs ("\t.hidden\t", asm_out_file);
7252 assemble_name (asm_out_file, name);
7253 fputc ('\n', asm_out_file);
7254 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7258 switch_to_section (text_section);
7259 ASM_OUTPUT_LABEL (asm_out_file, name);
7262 xops[0] = gen_rtx_REG (Pmode, regno);
7263 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7264 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7265 output_asm_insn ("ret", xops);
7268 if (NEED_INDICATE_EXEC_STACK)
7269 file_end_indicate_exec_stack ();
7272 /* Emit code for the SET_GOT patterns. */
7275 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7281 if (TARGET_VXWORKS_RTP && flag_pic)
7283 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7284 xops[2] = gen_rtx_MEM (Pmode,
7285 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7286 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7288 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7289 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7290 an unadorned address. */
7291 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7292 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7293 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7297 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7299 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7301 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7304 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7306 output_asm_insn ("call\t%a2", xops);
7309 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7310 is what will be referenced by the Mach-O PIC subsystem. */
7312 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7315 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7316 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7319 output_asm_insn ("pop%z0\t%0", xops);
7324 get_pc_thunk_name (name, REGNO (dest));
7325 pic_labels_used |= 1 << REGNO (dest);
7327 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7328 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7329 output_asm_insn ("call\t%X2", xops);
7330 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7331 is what will be referenced by the Mach-O PIC subsystem. */
7334 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7336 targetm.asm_out.internal_label (asm_out_file, "L",
7337 CODE_LABEL_NUMBER (label));
7344 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7345 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7347 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7352 /* Generate an "push" pattern for input ARG. */
7357 return gen_rtx_SET (VOIDmode,
7359 gen_rtx_PRE_DEC (Pmode,
7360 stack_pointer_rtx)),
7364 /* Return >= 0 if there is an unused call-clobbered register available
7365 for the entire function. */
7368 ix86_select_alt_pic_regnum (void)
7370 if (current_function_is_leaf && !crtl->profile
7371 && !ix86_current_function_calls_tls_descriptor)
7374 /* Can't use the same register for both PIC and DRAP. */
7376 drap = REGNO (crtl->drap_reg);
7379 for (i = 2; i >= 0; --i)
7380 if (i != drap && !df_regs_ever_live_p (i))
7384 return INVALID_REGNUM;
7387 /* Return 1 if we need to save REGNO. */
7389 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7391 if (pic_offset_table_rtx
7392 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7393 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7395 || crtl->calls_eh_return
7396 || crtl->uses_const_pool))
7398 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7403 if (crtl->calls_eh_return && maybe_eh_return)
7408 unsigned test = EH_RETURN_DATA_REGNO (i);
7409 if (test == INVALID_REGNUM)
7417 && regno == REGNO (crtl->drap_reg))
7420 return (df_regs_ever_live_p (regno)
7421 && !call_used_regs[regno]
7422 && !fixed_regs[regno]
7423 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7426 /* Return number of saved general prupose registers. */
7429 ix86_nsaved_regs (void)
7434 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7435 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7440 /* Return number of saved SSE registrers. */
7443 ix86_nsaved_sseregs (void)
7448 if (ix86_cfun_abi () != MS_ABI)
7450 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7451 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7456 /* Given FROM and TO register numbers, say whether this elimination is
7457 allowed. If stack alignment is needed, we can only replace argument
7458 pointer with hard frame pointer, or replace frame pointer with stack
7459 pointer. Otherwise, frame pointer elimination is automatically
7460 handled and all other eliminations are valid. */
7463 ix86_can_eliminate (int from, int to)
7465 if (stack_realign_fp)
7466 return ((from == ARG_POINTER_REGNUM
7467 && to == HARD_FRAME_POINTER_REGNUM)
7468 || (from == FRAME_POINTER_REGNUM
7469 && to == STACK_POINTER_REGNUM));
7471 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7474 /* Return the offset between two registers, one to be eliminated, and the other
7475 its replacement, at the start of a routine. */
7478 ix86_initial_elimination_offset (int from, int to)
7480 struct ix86_frame frame;
7481 ix86_compute_frame_layout (&frame);
7483 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7484 return frame.hard_frame_pointer_offset;
7485 else if (from == FRAME_POINTER_REGNUM
7486 && to == HARD_FRAME_POINTER_REGNUM)
7487 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7490 gcc_assert (to == STACK_POINTER_REGNUM);
7492 if (from == ARG_POINTER_REGNUM)
7493 return frame.stack_pointer_offset;
7495 gcc_assert (from == FRAME_POINTER_REGNUM);
7496 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7500 /* In a dynamically-aligned function, we can't know the offset from
7501 stack pointer to frame pointer, so we must ensure that setjmp
7502 eliminates fp against the hard fp (%ebp) rather than trying to
7503 index from %esp up to the top of the frame across a gap that is
7504 of unknown (at compile-time) size. */
7506 ix86_builtin_setjmp_frame_value (void)
7508 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7511 /* Fill structure ix86_frame about frame of currently computed function. */
7514 ix86_compute_frame_layout (struct ix86_frame *frame)
7516 HOST_WIDE_INT total_size;
7517 unsigned int stack_alignment_needed;
7518 HOST_WIDE_INT offset;
7519 unsigned int preferred_alignment;
7520 HOST_WIDE_INT size = get_frame_size ();
7522 frame->nregs = ix86_nsaved_regs ();
7523 frame->nsseregs = ix86_nsaved_sseregs ();
7526 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7527 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7529 /* MS ABI seem to require stack alignment to be always 16 except for function
7531 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7533 preferred_alignment = 16;
7534 stack_alignment_needed = 16;
7535 crtl->preferred_stack_boundary = 128;
7536 crtl->stack_alignment_needed = 128;
7539 gcc_assert (!size || stack_alignment_needed);
7540 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7541 gcc_assert (preferred_alignment <= stack_alignment_needed);
7543 /* During reload iteration the amount of registers saved can change.
7544 Recompute the value as needed. Do not recompute when amount of registers
7545 didn't change as reload does multiple calls to the function and does not
7546 expect the decision to change within single iteration. */
7547 if (!optimize_function_for_size_p (cfun)
7548 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7550 int count = frame->nregs;
7552 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7553 /* The fast prologue uses move instead of push to save registers. This
7554 is significantly longer, but also executes faster as modern hardware
7555 can execute the moves in parallel, but can't do that for push/pop.
7557 Be careful about choosing what prologue to emit: When function takes
7558 many instructions to execute we may use slow version as well as in
7559 case function is known to be outside hot spot (this is known with
7560 feedback only). Weight the size of function by number of registers
7561 to save as it is cheap to use one or two push instructions but very
7562 slow to use many of them. */
7564 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7565 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7566 || (flag_branch_probabilities
7567 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7568 cfun->machine->use_fast_prologue_epilogue = false;
7570 cfun->machine->use_fast_prologue_epilogue
7571 = !expensive_function_p (count);
7573 if (TARGET_PROLOGUE_USING_MOVE
7574 && cfun->machine->use_fast_prologue_epilogue)
7575 frame->save_regs_using_mov = true;
7577 frame->save_regs_using_mov = false;
7580 /* Skip return address and saved base pointer. */
7581 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7583 frame->hard_frame_pointer_offset = offset;
7585 /* Set offset to aligned because the realigned frame starts from
7587 if (stack_realign_fp)
7588 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7590 /* Register save area */
7591 offset += frame->nregs * UNITS_PER_WORD;
7593 /* Align SSE reg save area. */
7594 if (frame->nsseregs)
7595 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7597 frame->padding0 = 0;
7599 /* SSE register save area. */
7600 offset += frame->padding0 + frame->nsseregs * 16;
7603 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7604 offset += frame->va_arg_size;
7606 /* Align start of frame for local function. */
7607 frame->padding1 = ((offset + stack_alignment_needed - 1)
7608 & -stack_alignment_needed) - offset;
7610 offset += frame->padding1;
7612 /* Frame pointer points here. */
7613 frame->frame_pointer_offset = offset;
7617 /* Add outgoing arguments area. Can be skipped if we eliminated
7618 all the function calls as dead code.
7619 Skipping is however impossible when function calls alloca. Alloca
7620 expander assumes that last crtl->outgoing_args_size
7621 of stack frame are unused. */
7622 if (ACCUMULATE_OUTGOING_ARGS
7623 && (!current_function_is_leaf || cfun->calls_alloca
7624 || ix86_current_function_calls_tls_descriptor))
7626 offset += crtl->outgoing_args_size;
7627 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7630 frame->outgoing_arguments_size = 0;
7632 /* Align stack boundary. Only needed if we're calling another function
7634 if (!current_function_is_leaf || cfun->calls_alloca
7635 || ix86_current_function_calls_tls_descriptor)
7636 frame->padding2 = ((offset + preferred_alignment - 1)
7637 & -preferred_alignment) - offset;
7639 frame->padding2 = 0;
7641 offset += frame->padding2;
7643 /* We've reached end of stack frame. */
7644 frame->stack_pointer_offset = offset;
7646 /* Size prologue needs to allocate. */
7647 frame->to_allocate =
7648 (size + frame->padding1 + frame->padding2
7649 + frame->outgoing_arguments_size + frame->va_arg_size);
7651 if ((!frame->to_allocate && frame->nregs <= 1)
7652 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7653 frame->save_regs_using_mov = false;
7655 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7656 && current_function_is_leaf
7657 && !ix86_current_function_calls_tls_descriptor)
7659 frame->red_zone_size = frame->to_allocate;
7660 if (frame->save_regs_using_mov)
7661 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7662 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7663 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7666 frame->red_zone_size = 0;
7667 frame->to_allocate -= frame->red_zone_size;
7668 frame->stack_pointer_offset -= frame->red_zone_size;
7670 fprintf (stderr, "\n");
7671 fprintf (stderr, "size: %ld\n", (long)size);
7672 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7673 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7674 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7675 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7676 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7677 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7678 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7679 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7680 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7681 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7682 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7683 (long)frame->hard_frame_pointer_offset);
7684 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7685 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7686 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7687 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7691 /* Emit code to save registers in the prologue. */
7694 ix86_emit_save_regs (void)
7699 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7700 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7702 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7703 RTX_FRAME_RELATED_P (insn) = 1;
7707 /* Emit code to save registers using MOV insns. First register
7708 is restored from POINTER + OFFSET. */
7710 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7715 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7716 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7718 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7720 gen_rtx_REG (Pmode, regno));
7721 RTX_FRAME_RELATED_P (insn) = 1;
7722 offset += UNITS_PER_WORD;
7726 /* Emit code to save registers using MOV insns. First register
7727 is restored from POINTER + OFFSET. */
7729 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7735 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7736 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7738 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7739 set_mem_align (mem, 128);
7740 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
7741 RTX_FRAME_RELATED_P (insn) = 1;
7746 /* Expand prologue or epilogue stack adjustment.
7747 The pattern exist to put a dependency on all ebp-based memory accesses.
7748 STYLE should be negative if instructions should be marked as frame related,
7749 zero if %r11 register is live and cannot be freely used and positive
7753 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7758 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7759 else if (x86_64_immediate_operand (offset, DImode))
7760 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7764 /* r11 is used by indirect sibcall return as well, set before the
7765 epilogue and used after the epilogue. ATM indirect sibcall
7766 shouldn't be used together with huge frame sizes in one
7767 function because of the frame_size check in sibcall.c. */
7769 r11 = gen_rtx_REG (DImode, R11_REG);
7770 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
7772 RTX_FRAME_RELATED_P (insn) = 1;
7773 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
7777 RTX_FRAME_RELATED_P (insn) = 1;
7780 /* Find an available register to be used as dynamic realign argument
7781 pointer regsiter. Such a register will be written in prologue and
7782 used in begin of body, so it must not be
7783 1. parameter passing register.
7785 We reuse static-chain register if it is available. Otherwise, we
7786 use DI for i386 and R13 for x86-64. We chose R13 since it has
7789 Return: the regno of chosen register. */
7792 find_drap_reg (void)
7794 tree decl = cfun->decl;
7798 /* Use R13 for nested function or function need static chain.
7799 Since function with tail call may use any caller-saved
7800 registers in epilogue, DRAP must not use caller-saved
7801 register in such case. */
7802 if ((decl_function_context (decl)
7803 && !DECL_NO_STATIC_CHAIN (decl))
7804 || crtl->tail_call_emit)
7811 /* Use DI for nested function or function need static chain.
7812 Since function with tail call may use any caller-saved
7813 registers in epilogue, DRAP must not use caller-saved
7814 register in such case. */
7815 if ((decl_function_context (decl)
7816 && !DECL_NO_STATIC_CHAIN (decl))
7817 || crtl->tail_call_emit)
7820 /* Reuse static chain register if it isn't used for parameter
7822 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
7823 && !lookup_attribute ("fastcall",
7824 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
7831 /* Update incoming stack boundary and estimated stack alignment. */
7834 ix86_update_stack_boundary (void)
7836 /* Prefer the one specified at command line. */
7837 ix86_incoming_stack_boundary
7838 = (ix86_user_incoming_stack_boundary
7839 ? ix86_user_incoming_stack_boundary
7840 : ix86_default_incoming_stack_boundary);
7842 /* Incoming stack alignment can be changed on individual functions
7843 via force_align_arg_pointer attribute. We use the smallest
7844 incoming stack boundary. */
7845 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
7846 && lookup_attribute (ix86_force_align_arg_pointer_string,
7847 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7848 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
7850 /* The incoming stack frame has to be aligned at least at
7851 parm_stack_boundary. */
7852 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
7853 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
7855 /* Stack at entrance of main is aligned by runtime. We use the
7856 smallest incoming stack boundary. */
7857 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
7858 && DECL_NAME (current_function_decl)
7859 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7860 && DECL_FILE_SCOPE_P (current_function_decl))
7861 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7863 /* x86_64 vararg needs 16byte stack alignment for register save
7867 && crtl->stack_alignment_estimated < 128)
7868 crtl->stack_alignment_estimated = 128;
7871 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7872 needed or an rtx for DRAP otherwise. */
7875 ix86_get_drap_rtx (void)
7877 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
7878 crtl->need_drap = true;
7880 if (stack_realign_drap)
7882 /* Assign DRAP to vDRAP and returns vDRAP */
7883 unsigned int regno = find_drap_reg ();
7888 arg_ptr = gen_rtx_REG (Pmode, regno);
7889 crtl->drap_reg = arg_ptr;
7892 drap_vreg = copy_to_reg (arg_ptr);
7896 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7897 RTX_FRAME_RELATED_P (insn) = 1;
7904 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7907 ix86_internal_arg_pointer (void)
7909 return virtual_incoming_args_rtx;
7912 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
7913 This is called from dwarf2out.c to emit call frame instructions
7914 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
7916 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
7918 rtx unspec = SET_SRC (pattern);
7919 gcc_assert (GET_CODE (unspec) == UNSPEC);
7923 case UNSPEC_REG_SAVE:
7924 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
7925 SET_DEST (pattern));
7927 case UNSPEC_DEF_CFA:
7928 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
7929 INTVAL (XVECEXP (unspec, 0, 0)));
7936 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
7937 to be generated in correct form. */
7939 ix86_finalize_stack_realign_flags (void)
7941 /* Check if stack realign is really needed after reload, and
7942 stores result in cfun */
7943 unsigned int incoming_stack_boundary
7944 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7945 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7946 unsigned int stack_realign = (incoming_stack_boundary
7947 < (current_function_is_leaf
7948 ? crtl->max_used_stack_slot_alignment
7949 : crtl->stack_alignment_needed));
7951 if (crtl->stack_realign_finalized)
7953 /* After stack_realign_needed is finalized, we can't no longer
7955 gcc_assert (crtl->stack_realign_needed == stack_realign);
7959 crtl->stack_realign_needed = stack_realign;
7960 crtl->stack_realign_finalized = true;
7964 /* Expand the prologue into a bunch of separate insns. */
7967 ix86_expand_prologue (void)
7971 struct ix86_frame frame;
7972 HOST_WIDE_INT allocate;
7974 ix86_finalize_stack_realign_flags ();
7976 /* DRAP should not coexist with stack_realign_fp */
7977 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
7979 ix86_compute_frame_layout (&frame);
7981 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
7982 of DRAP is needed and stack realignment is really needed after reload */
7983 if (crtl->drap_reg && crtl->stack_realign_needed)
7986 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7987 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
7988 ? 0 : UNITS_PER_WORD);
7990 gcc_assert (stack_realign_drap);
7992 /* Grab the argument pointer. */
7993 x = plus_constant (stack_pointer_rtx,
7994 (UNITS_PER_WORD + param_ptr_offset));
7997 /* Only need to push parameter pointer reg if it is caller
7999 if (!call_used_regs[REGNO (crtl->drap_reg)])
8001 /* Push arg pointer reg */
8002 insn = emit_insn (gen_push (y));
8003 RTX_FRAME_RELATED_P (insn) = 1;
8006 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8007 RTX_FRAME_RELATED_P (insn) = 1;
8009 /* Align the stack. */
8010 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8012 GEN_INT (-align_bytes)));
8013 RTX_FRAME_RELATED_P (insn) = 1;
8015 /* Replicate the return address on the stack so that return
8016 address can be reached via (argp - 1) slot. This is needed
8017 to implement macro RETURN_ADDR_RTX and intrinsic function
8018 expand_builtin_return_addr etc. */
8020 x = gen_frame_mem (Pmode,
8021 plus_constant (x, -UNITS_PER_WORD));
8022 insn = emit_insn (gen_push (x));
8023 RTX_FRAME_RELATED_P (insn) = 1;
8026 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8027 slower on all targets. Also sdb doesn't like it. */
8029 if (frame_pointer_needed)
8031 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8032 RTX_FRAME_RELATED_P (insn) = 1;
8034 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8035 RTX_FRAME_RELATED_P (insn) = 1;
8038 if (stack_realign_fp)
8040 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8041 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8043 /* Align the stack. */
8044 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8046 GEN_INT (-align_bytes)));
8047 RTX_FRAME_RELATED_P (insn) = 1;
8050 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8052 if (!frame.save_regs_using_mov)
8053 ix86_emit_save_regs ();
8055 allocate += frame.nregs * UNITS_PER_WORD;
8057 /* When using red zone we may start register saving before allocating
8058 the stack frame saving one cycle of the prologue. However I will
8059 avoid doing this if I am going to have to probe the stack since
8060 at least on x86_64 the stack probe can turn into a call that clobbers
8061 a red zone location */
8062 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8063 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8064 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8065 && !crtl->stack_realign_needed)
8066 ? hard_frame_pointer_rtx
8067 : stack_pointer_rtx,
8068 -frame.nregs * UNITS_PER_WORD);
8072 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8073 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8074 GEN_INT (-allocate), -1);
8077 /* Only valid for Win32. */
8078 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8082 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8084 if (cfun->machine->call_abi == MS_ABI)
8087 eax_live = ix86_eax_live_at_start_p ();
8091 emit_insn (gen_push (eax));
8092 allocate -= UNITS_PER_WORD;
8095 emit_move_insn (eax, GEN_INT (allocate));
8098 insn = gen_allocate_stack_worker_64 (eax, eax);
8100 insn = gen_allocate_stack_worker_32 (eax, eax);
8101 insn = emit_insn (insn);
8102 RTX_FRAME_RELATED_P (insn) = 1;
8103 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8104 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8105 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
8106 t, REG_NOTES (insn));
8110 if (frame_pointer_needed)
8111 t = plus_constant (hard_frame_pointer_rtx,
8114 - frame.nregs * UNITS_PER_WORD);
8116 t = plus_constant (stack_pointer_rtx, allocate);
8117 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8121 if (frame.save_regs_using_mov
8122 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8123 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8125 if (!frame_pointer_needed
8126 || !frame.to_allocate
8127 || crtl->stack_realign_needed)
8128 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8130 + frame.nsseregs * 16 + frame.padding0);
8132 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8133 -frame.nregs * UNITS_PER_WORD);
8135 if (!frame_pointer_needed
8136 || !frame.to_allocate
8137 || crtl->stack_realign_needed)
8138 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8141 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8142 - frame.nregs * UNITS_PER_WORD
8143 - frame.nsseregs * 16
8146 pic_reg_used = false;
8147 if (pic_offset_table_rtx
8148 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8151 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8153 if (alt_pic_reg_used != INVALID_REGNUM)
8154 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8156 pic_reg_used = true;
8163 if (ix86_cmodel == CM_LARGE_PIC)
8165 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8166 rtx label = gen_label_rtx ();
8168 LABEL_PRESERVE_P (label) = 1;
8169 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8170 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8171 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8172 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8173 pic_offset_table_rtx, tmp_reg));
8176 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8179 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8182 /* Prevent function calls from being scheduled before the call to mcount.
8183 In the pic_reg_used case, make sure that the got load isn't deleted. */
8187 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8188 emit_insn (gen_blockage ());
8191 if (crtl->drap_reg && !crtl->stack_realign_needed)
8193 /* vDRAP is setup but after reload it turns out stack realign
8194 isn't necessary, here we will emit prologue to setup DRAP
8195 without stack realign adjustment */
8196 int drap_bp_offset = UNITS_PER_WORD * 2;
8197 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8198 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8201 /* Emit cld instruction if stringops are used in the function. */
8202 if (TARGET_CLD && ix86_current_function_needs_cld)
8203 emit_insn (gen_cld ());
8206 /* Emit code to restore saved registers using MOV insns. First register
8207 is restored from POINTER + OFFSET. */
8209 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8210 int maybe_eh_return)
8213 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8215 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8216 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8218 /* Ensure that adjust_address won't be forced to produce pointer
8219 out of range allowed by x86-64 instruction set. */
8220 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8224 r11 = gen_rtx_REG (DImode, R11_REG);
8225 emit_move_insn (r11, GEN_INT (offset));
8226 emit_insn (gen_adddi3 (r11, r11, pointer));
8227 base_address = gen_rtx_MEM (Pmode, r11);
8230 emit_move_insn (gen_rtx_REG (Pmode, regno),
8231 adjust_address (base_address, Pmode, offset));
8232 offset += UNITS_PER_WORD;
8236 /* Emit code to restore saved registers using MOV insns. First register
8237 is restored from POINTER + OFFSET. */
8239 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8240 int maybe_eh_return)
8243 rtx base_address = gen_rtx_MEM (TImode, pointer);
8246 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8247 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8249 /* Ensure that adjust_address won't be forced to produce pointer
8250 out of range allowed by x86-64 instruction set. */
8251 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8255 r11 = gen_rtx_REG (DImode, R11_REG);
8256 emit_move_insn (r11, GEN_INT (offset));
8257 emit_insn (gen_adddi3 (r11, r11, pointer));
8258 base_address = gen_rtx_MEM (TImode, r11);
8261 mem = adjust_address (base_address, TImode, offset);
8262 set_mem_align (mem, 128);
8263 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8268 /* Restore function stack, frame, and registers. */
8271 ix86_expand_epilogue (int style)
8275 struct ix86_frame frame;
8276 HOST_WIDE_INT offset;
8278 ix86_finalize_stack_realign_flags ();
8280 /* When stack is realigned, SP must be valid. */
8281 sp_valid = (!frame_pointer_needed
8282 || current_function_sp_is_unchanging
8283 || stack_realign_fp);
8285 ix86_compute_frame_layout (&frame);
8287 /* Calculate start of saved registers relative to ebp. Special care
8288 must be taken for the normal return case of a function using
8289 eh_return: the eax and edx registers are marked as saved, but not
8290 restored along this path. */
8291 offset = frame.nregs;
8292 if (crtl->calls_eh_return && style != 2)
8294 offset *= -UNITS_PER_WORD;
8295 offset -= frame.nsseregs * 16 + frame.padding0;
8297 /* If we're only restoring one register and sp is not valid then
8298 using a move instruction to restore the register since it's
8299 less work than reloading sp and popping the register.
8301 The default code result in stack adjustment using add/lea instruction,
8302 while this code results in LEAVE instruction (or discrete equivalent),
8303 so it is profitable in some other cases as well. Especially when there
8304 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8305 and there is exactly one register to pop. This heuristic may need some
8306 tuning in future. */
8307 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8308 || (TARGET_EPILOGUE_USING_MOVE
8309 && cfun->machine->use_fast_prologue_epilogue
8310 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8311 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
8312 || (frame_pointer_needed && TARGET_USE_LEAVE
8313 && cfun->machine->use_fast_prologue_epilogue
8314 && (frame.nregs + frame.nsseregs) == 1)
8315 || crtl->calls_eh_return)
8317 /* Restore registers. We can use ebp or esp to address the memory
8318 locations. If both are available, default to ebp, since offsets
8319 are known to be small. Only exception is esp pointing directly
8320 to the end of block of saved registers, where we may simplify
8323 If we are realigning stack with bp and sp, regs restore can't
8324 be addressed by bp. sp must be used instead. */
8326 if (!frame_pointer_needed
8327 || (sp_valid && !frame.to_allocate)
8328 || stack_realign_fp)
8330 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8331 frame.to_allocate, style == 2);
8332 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8334 + frame.nsseregs * 16
8335 + frame.padding0, style == 2);
8339 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8340 offset, style == 2);
8341 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8343 + frame.nsseregs * 16
8344 + frame.padding0, style == 2);
8347 /* eh_return epilogues need %ecx added to the stack pointer. */
8350 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8352 /* Stack align doesn't work with eh_return. */
8353 gcc_assert (!crtl->stack_realign_needed);
8355 if (frame_pointer_needed)
8357 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8358 tmp = plus_constant (tmp, UNITS_PER_WORD);
8359 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8361 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8362 emit_move_insn (hard_frame_pointer_rtx, tmp);
8364 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8369 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8370 tmp = plus_constant (tmp, (frame.to_allocate
8371 + frame.nregs * UNITS_PER_WORD
8372 + frame.nsseregs * 16
8374 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8377 else if (!frame_pointer_needed)
8378 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8379 GEN_INT (frame.to_allocate
8380 + frame.nregs * UNITS_PER_WORD
8381 + frame.nsseregs * 16
8384 /* If not an i386, mov & pop is faster than "leave". */
8385 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8386 || !cfun->machine->use_fast_prologue_epilogue)
8387 emit_insn ((*ix86_gen_leave) ());
8390 pro_epilogue_adjust_stack (stack_pointer_rtx,
8391 hard_frame_pointer_rtx,
8394 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8399 /* First step is to deallocate the stack frame so that we can
8402 If we realign stack with frame pointer, then stack pointer
8403 won't be able to recover via lea $offset(%bp), %sp, because
8404 there is a padding area between bp and sp for realign.
8405 "add $to_allocate, %sp" must be used instead. */
8408 gcc_assert (frame_pointer_needed);
8409 gcc_assert (!stack_realign_fp);
8410 pro_epilogue_adjust_stack (stack_pointer_rtx,
8411 hard_frame_pointer_rtx,
8412 GEN_INT (offset), style);
8413 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8414 frame.to_allocate, style == 2);
8415 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8416 GEN_INT (frame.nsseregs * 16), style);
8418 else if (frame.to_allocate || frame.nsseregs)
8420 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8423 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8424 GEN_INT (frame.to_allocate
8425 + frame.nsseregs * 16
8426 + frame.padding0), style);
8429 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8430 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8431 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8432 if (frame_pointer_needed)
8434 /* Leave results in shorter dependency chains on CPUs that are
8435 able to grok it fast. */
8436 if (TARGET_USE_LEAVE)
8437 emit_insn ((*ix86_gen_leave) ());
8440 /* For stack realigned really happens, recover stack
8441 pointer to hard frame pointer is a must, if not using
8443 if (stack_realign_fp)
8444 pro_epilogue_adjust_stack (stack_pointer_rtx,
8445 hard_frame_pointer_rtx,
8447 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8452 if (crtl->drap_reg && crtl->stack_realign_needed)
8454 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8455 ? 0 : UNITS_PER_WORD);
8456 gcc_assert (stack_realign_drap);
8457 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8459 GEN_INT (-(UNITS_PER_WORD
8460 + param_ptr_offset))));
8461 if (!call_used_regs[REGNO (crtl->drap_reg)])
8462 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8466 /* Sibcall epilogues don't want a return instruction. */
8470 if (crtl->args.pops_args && crtl->args.size)
8472 rtx popc = GEN_INT (crtl->args.pops_args);
8474 /* i386 can only pop 64K bytes. If asked to pop more, pop
8475 return address, do explicit add, and jump indirectly to the
8478 if (crtl->args.pops_args >= 65536)
8480 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8482 /* There is no "pascal" calling convention in any 64bit ABI. */
8483 gcc_assert (!TARGET_64BIT);
8485 emit_insn (gen_popsi1 (ecx));
8486 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8487 emit_jump_insn (gen_return_indirect_internal (ecx));
8490 emit_jump_insn (gen_return_pop_internal (popc));
8493 emit_jump_insn (gen_return_internal ());
8496 /* Reset from the function's potential modifications. */
8499 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8500 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8502 if (pic_offset_table_rtx)
8503 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8505 /* Mach-O doesn't support labels at the end of objects, so if
8506 it looks like we might want one, insert a NOP. */
8508 rtx insn = get_last_insn ();
8511 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8512 insn = PREV_INSN (insn);
8516 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8517 fputs ("\tnop\n", file);
8523 /* Extract the parts of an RTL expression that is a valid memory address
8524 for an instruction. Return 0 if the structure of the address is
8525 grossly off. Return -1 if the address contains ASHIFT, so it is not
8526 strictly valid, but still used for computing length of lea instruction. */
8529 ix86_decompose_address (rtx addr, struct ix86_address *out)
8531 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8532 rtx base_reg, index_reg;
8533 HOST_WIDE_INT scale = 1;
8534 rtx scale_rtx = NULL_RTX;
8536 enum ix86_address_seg seg = SEG_DEFAULT;
8538 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8540 else if (GET_CODE (addr) == PLUS)
8550 addends[n++] = XEXP (op, 1);
8553 while (GET_CODE (op) == PLUS);
8558 for (i = n; i >= 0; --i)
8561 switch (GET_CODE (op))
8566 index = XEXP (op, 0);
8567 scale_rtx = XEXP (op, 1);
8571 if (XINT (op, 1) == UNSPEC_TP
8572 && TARGET_TLS_DIRECT_SEG_REFS
8573 && seg == SEG_DEFAULT)
8574 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8603 else if (GET_CODE (addr) == MULT)
8605 index = XEXP (addr, 0); /* index*scale */
8606 scale_rtx = XEXP (addr, 1);
8608 else if (GET_CODE (addr) == ASHIFT)
8612 /* We're called for lea too, which implements ashift on occasion. */
8613 index = XEXP (addr, 0);
8614 tmp = XEXP (addr, 1);
8615 if (!CONST_INT_P (tmp))
8617 scale = INTVAL (tmp);
8618 if ((unsigned HOST_WIDE_INT) scale > 3)
8624 disp = addr; /* displacement */
8626 /* Extract the integral value of scale. */
8629 if (!CONST_INT_P (scale_rtx))
8631 scale = INTVAL (scale_rtx);
8634 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8635 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8637 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8638 if (base_reg && index_reg && scale == 1
8639 && (index_reg == arg_pointer_rtx
8640 || index_reg == frame_pointer_rtx
8641 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8644 tmp = base, base = index, index = tmp;
8645 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8648 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8649 if ((base_reg == hard_frame_pointer_rtx
8650 || base_reg == frame_pointer_rtx
8651 || base_reg == arg_pointer_rtx) && !disp)
8654 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8655 Avoid this by transforming to [%esi+0].
8656 Reload calls address legitimization without cfun defined, so we need
8657 to test cfun for being non-NULL. */
8658 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8659 && base_reg && !index_reg && !disp
8661 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8664 /* Special case: encode reg+reg instead of reg*2. */
8665 if (!base && index && scale && scale == 2)
8666 base = index, base_reg = index_reg, scale = 1;
8668 /* Special case: scaling cannot be encoded without base or displacement. */
8669 if (!base && !disp && index && scale != 1)
8681 /* Return cost of the memory address x.
8682 For i386, it is better to use a complex address than let gcc copy
8683 the address into a reg and make a new pseudo. But not if the address
8684 requires to two regs - that would mean more pseudos with longer
8687 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8689 struct ix86_address parts;
8691 int ok = ix86_decompose_address (x, &parts);
8695 if (parts.base && GET_CODE (parts.base) == SUBREG)
8696 parts.base = SUBREG_REG (parts.base);
8697 if (parts.index && GET_CODE (parts.index) == SUBREG)
8698 parts.index = SUBREG_REG (parts.index);
8700 /* Attempt to minimize number of registers in the address. */
8702 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8704 && (!REG_P (parts.index)
8705 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8709 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8711 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8712 && parts.base != parts.index)
8715 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8716 since it's predecode logic can't detect the length of instructions
8717 and it degenerates to vector decoded. Increase cost of such
8718 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8719 to split such addresses or even refuse such addresses at all.
8721 Following addressing modes are affected:
8726 The first and last case may be avoidable by explicitly coding the zero in
8727 memory address, but I don't have AMD-K6 machine handy to check this
8731 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8732 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8733 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8739 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8740 this is used for to form addresses to local data when -fPIC is in
8744 darwin_local_data_pic (rtx disp)
8746 return (GET_CODE (disp) == UNSPEC
8747 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
8750 /* Determine if a given RTX is a valid constant. We already know this
8751 satisfies CONSTANT_P. */
8754 legitimate_constant_p (rtx x)
8756 switch (GET_CODE (x))
8761 if (GET_CODE (x) == PLUS)
8763 if (!CONST_INT_P (XEXP (x, 1)))
8768 if (TARGET_MACHO && darwin_local_data_pic (x))
8771 /* Only some unspecs are valid as "constants". */
8772 if (GET_CODE (x) == UNSPEC)
8773 switch (XINT (x, 1))
8778 return TARGET_64BIT;
8781 x = XVECEXP (x, 0, 0);
8782 return (GET_CODE (x) == SYMBOL_REF
8783 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8785 x = XVECEXP (x, 0, 0);
8786 return (GET_CODE (x) == SYMBOL_REF
8787 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
8792 /* We must have drilled down to a symbol. */
8793 if (GET_CODE (x) == LABEL_REF)
8795 if (GET_CODE (x) != SYMBOL_REF)
8800 /* TLS symbols are never valid. */
8801 if (SYMBOL_REF_TLS_MODEL (x))
8804 /* DLLIMPORT symbols are never valid. */
8805 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
8806 && SYMBOL_REF_DLLIMPORT_P (x))
8811 if (GET_MODE (x) == TImode
8812 && x != CONST0_RTX (TImode)
8818 if (x == CONST0_RTX (GET_MODE (x)))
8826 /* Otherwise we handle everything else in the move patterns. */
8830 /* Determine if it's legal to put X into the constant pool. This
8831 is not possible for the address of thread-local symbols, which
8832 is checked above. */
8835 ix86_cannot_force_const_mem (rtx x)
8837 /* We can always put integral constants and vectors in memory. */
8838 switch (GET_CODE (x))
8848 return !legitimate_constant_p (x);
8851 /* Determine if a given RTX is a valid constant address. */
8854 constant_address_p (rtx x)
8856 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
8859 /* Nonzero if the constant value X is a legitimate general operand
8860 when generating PIC code. It is given that flag_pic is on and
8861 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
8864 legitimate_pic_operand_p (rtx x)
8868 switch (GET_CODE (x))
8871 inner = XEXP (x, 0);
8872 if (GET_CODE (inner) == PLUS
8873 && CONST_INT_P (XEXP (inner, 1)))
8874 inner = XEXP (inner, 0);
8876 /* Only some unspecs are valid as "constants". */
8877 if (GET_CODE (inner) == UNSPEC)
8878 switch (XINT (inner, 1))
8883 return TARGET_64BIT;
8885 x = XVECEXP (inner, 0, 0);
8886 return (GET_CODE (x) == SYMBOL_REF
8887 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8888 case UNSPEC_MACHOPIC_OFFSET:
8889 return legitimate_pic_address_disp_p (x);
8897 return legitimate_pic_address_disp_p (x);
8904 /* Determine if a given CONST RTX is a valid memory displacement
8908 legitimate_pic_address_disp_p (rtx disp)
8912 /* In 64bit mode we can allow direct addresses of symbols and labels
8913 when they are not dynamic symbols. */
8916 rtx op0 = disp, op1;
8918 switch (GET_CODE (disp))
8924 if (GET_CODE (XEXP (disp, 0)) != PLUS)
8926 op0 = XEXP (XEXP (disp, 0), 0);
8927 op1 = XEXP (XEXP (disp, 0), 1);
8928 if (!CONST_INT_P (op1)
8929 || INTVAL (op1) >= 16*1024*1024
8930 || INTVAL (op1) < -16*1024*1024)
8932 if (GET_CODE (op0) == LABEL_REF)
8934 if (GET_CODE (op0) != SYMBOL_REF)
8939 /* TLS references should always be enclosed in UNSPEC. */
8940 if (SYMBOL_REF_TLS_MODEL (op0))
8942 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
8943 && ix86_cmodel != CM_LARGE_PIC)
8951 if (GET_CODE (disp) != CONST)
8953 disp = XEXP (disp, 0);
8957 /* We are unsafe to allow PLUS expressions. This limit allowed distance
8958 of GOT tables. We should not need these anyway. */
8959 if (GET_CODE (disp) != UNSPEC
8960 || (XINT (disp, 1) != UNSPEC_GOTPCREL
8961 && XINT (disp, 1) != UNSPEC_GOTOFF
8962 && XINT (disp, 1) != UNSPEC_PLTOFF))
8965 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
8966 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
8972 if (GET_CODE (disp) == PLUS)
8974 if (!CONST_INT_P (XEXP (disp, 1)))
8976 disp = XEXP (disp, 0);
8980 if (TARGET_MACHO && darwin_local_data_pic (disp))
8983 if (GET_CODE (disp) != UNSPEC)
8986 switch (XINT (disp, 1))
8991 /* We need to check for both symbols and labels because VxWorks loads
8992 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
8994 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8995 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
8997 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
8998 While ABI specify also 32bit relocation but we don't produce it in
8999 small PIC model at all. */
9000 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9001 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9003 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9005 case UNSPEC_GOTTPOFF:
9006 case UNSPEC_GOTNTPOFF:
9007 case UNSPEC_INDNTPOFF:
9010 disp = XVECEXP (disp, 0, 0);
9011 return (GET_CODE (disp) == SYMBOL_REF
9012 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9014 disp = XVECEXP (disp, 0, 0);
9015 return (GET_CODE (disp) == SYMBOL_REF
9016 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9018 disp = XVECEXP (disp, 0, 0);
9019 return (GET_CODE (disp) == SYMBOL_REF
9020 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9026 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
9027 memory address for an instruction. The MODE argument is the machine mode
9028 for the MEM expression that wants to use this address.
9030 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9031 convert common non-canonical forms to canonical form so that they will
9035 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9036 rtx addr, int strict)
9038 struct ix86_address parts;
9039 rtx base, index, disp;
9040 HOST_WIDE_INT scale;
9041 const char *reason = NULL;
9042 rtx reason_rtx = NULL_RTX;
9044 if (ix86_decompose_address (addr, &parts) <= 0)
9046 reason = "decomposition failed";
9051 index = parts.index;
9053 scale = parts.scale;
9055 /* Validate base register.
9057 Don't allow SUBREG's that span more than a word here. It can lead to spill
9058 failures when the base is one word out of a two word structure, which is
9059 represented internally as a DImode int. */
9068 else if (GET_CODE (base) == SUBREG
9069 && REG_P (SUBREG_REG (base))
9070 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9072 reg = SUBREG_REG (base);
9075 reason = "base is not a register";
9079 if (GET_MODE (base) != Pmode)
9081 reason = "base is not in Pmode";
9085 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9086 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9088 reason = "base is not valid";
9093 /* Validate index register.
9095 Don't allow SUBREG's that span more than a word here -- same as above. */
9104 else if (GET_CODE (index) == SUBREG
9105 && REG_P (SUBREG_REG (index))
9106 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9108 reg = SUBREG_REG (index);
9111 reason = "index is not a register";
9115 if (GET_MODE (index) != Pmode)
9117 reason = "index is not in Pmode";
9121 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9122 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9124 reason = "index is not valid";
9129 /* Validate scale factor. */
9132 reason_rtx = GEN_INT (scale);
9135 reason = "scale without index";
9139 if (scale != 2 && scale != 4 && scale != 8)
9141 reason = "scale is not a valid multiplier";
9146 /* Validate displacement. */
9151 if (GET_CODE (disp) == CONST
9152 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9153 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9154 switch (XINT (XEXP (disp, 0), 1))
9156 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9157 used. While ABI specify also 32bit relocations, we don't produce
9158 them at all and use IP relative instead. */
9161 gcc_assert (flag_pic);
9163 goto is_legitimate_pic;
9164 reason = "64bit address unspec";
9167 case UNSPEC_GOTPCREL:
9168 gcc_assert (flag_pic);
9169 goto is_legitimate_pic;
9171 case UNSPEC_GOTTPOFF:
9172 case UNSPEC_GOTNTPOFF:
9173 case UNSPEC_INDNTPOFF:
9179 reason = "invalid address unspec";
9183 else if (SYMBOLIC_CONST (disp)
9187 && MACHOPIC_INDIRECT
9188 && !machopic_operand_p (disp)
9194 if (TARGET_64BIT && (index || base))
9196 /* foo@dtpoff(%rX) is ok. */
9197 if (GET_CODE (disp) != CONST
9198 || GET_CODE (XEXP (disp, 0)) != PLUS
9199 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9200 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9201 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9202 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9204 reason = "non-constant pic memory reference";
9208 else if (! legitimate_pic_address_disp_p (disp))
9210 reason = "displacement is an invalid pic construct";
9214 /* This code used to verify that a symbolic pic displacement
9215 includes the pic_offset_table_rtx register.
9217 While this is good idea, unfortunately these constructs may
9218 be created by "adds using lea" optimization for incorrect
9227 This code is nonsensical, but results in addressing
9228 GOT table with pic_offset_table_rtx base. We can't
9229 just refuse it easily, since it gets matched by
9230 "addsi3" pattern, that later gets split to lea in the
9231 case output register differs from input. While this
9232 can be handled by separate addsi pattern for this case
9233 that never results in lea, this seems to be easier and
9234 correct fix for crash to disable this test. */
9236 else if (GET_CODE (disp) != LABEL_REF
9237 && !CONST_INT_P (disp)
9238 && (GET_CODE (disp) != CONST
9239 || !legitimate_constant_p (disp))
9240 && (GET_CODE (disp) != SYMBOL_REF
9241 || !legitimate_constant_p (disp)))
9243 reason = "displacement is not constant";
9246 else if (TARGET_64BIT
9247 && !x86_64_immediate_operand (disp, VOIDmode))
9249 reason = "displacement is out of range";
9254 /* Everything looks valid. */
9261 /* Return a unique alias set for the GOT. */
9263 static alias_set_type
9264 ix86_GOT_alias_set (void)
9266 static alias_set_type set = -1;
9268 set = new_alias_set ();
9272 /* Return a legitimate reference for ORIG (an address) using the
9273 register REG. If REG is 0, a new pseudo is generated.
9275 There are two types of references that must be handled:
9277 1. Global data references must load the address from the GOT, via
9278 the PIC reg. An insn is emitted to do this load, and the reg is
9281 2. Static data references, constant pool addresses, and code labels
9282 compute the address as an offset from the GOT, whose base is in
9283 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9284 differentiate them from global data objects. The returned
9285 address is the PIC reg + an unspec constant.
9287 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9288 reg also appears in the address. */
9291 legitimize_pic_address (rtx orig, rtx reg)
9298 if (TARGET_MACHO && !TARGET_64BIT)
9301 reg = gen_reg_rtx (Pmode);
9302 /* Use the generic Mach-O PIC machinery. */
9303 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9307 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9309 else if (TARGET_64BIT
9310 && ix86_cmodel != CM_SMALL_PIC
9311 && gotoff_operand (addr, Pmode))
9314 /* This symbol may be referenced via a displacement from the PIC
9315 base address (@GOTOFF). */
9317 if (reload_in_progress)
9318 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9319 if (GET_CODE (addr) == CONST)
9320 addr = XEXP (addr, 0);
9321 if (GET_CODE (addr) == PLUS)
9323 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9325 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9328 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9329 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9331 tmpreg = gen_reg_rtx (Pmode);
9334 emit_move_insn (tmpreg, new_rtx);
9338 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9339 tmpreg, 1, OPTAB_DIRECT);
9342 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9344 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9346 /* This symbol may be referenced via a displacement from the PIC
9347 base address (@GOTOFF). */
9349 if (reload_in_progress)
9350 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9351 if (GET_CODE (addr) == CONST)
9352 addr = XEXP (addr, 0);
9353 if (GET_CODE (addr) == PLUS)
9355 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9357 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9360 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9361 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9362 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9366 emit_move_insn (reg, new_rtx);
9370 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9371 /* We can't use @GOTOFF for text labels on VxWorks;
9372 see gotoff_operand. */
9373 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9375 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9377 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9378 return legitimize_dllimport_symbol (addr, true);
9379 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9380 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9381 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9383 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9384 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9388 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9390 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9391 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9392 new_rtx = gen_const_mem (Pmode, new_rtx);
9393 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9396 reg = gen_reg_rtx (Pmode);
9397 /* Use directly gen_movsi, otherwise the address is loaded
9398 into register for CSE. We don't want to CSE this addresses,
9399 instead we CSE addresses from the GOT table, so skip this. */
9400 emit_insn (gen_movsi (reg, new_rtx));
9405 /* This symbol must be referenced via a load from the
9406 Global Offset Table (@GOT). */
9408 if (reload_in_progress)
9409 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9410 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9411 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9413 new_rtx = force_reg (Pmode, new_rtx);
9414 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9415 new_rtx = gen_const_mem (Pmode, new_rtx);
9416 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9419 reg = gen_reg_rtx (Pmode);
9420 emit_move_insn (reg, new_rtx);
9426 if (CONST_INT_P (addr)
9427 && !x86_64_immediate_operand (addr, VOIDmode))
9431 emit_move_insn (reg, addr);
9435 new_rtx = force_reg (Pmode, addr);
9437 else if (GET_CODE (addr) == CONST)
9439 addr = XEXP (addr, 0);
9441 /* We must match stuff we generate before. Assume the only
9442 unspecs that can get here are ours. Not that we could do
9443 anything with them anyway.... */
9444 if (GET_CODE (addr) == UNSPEC
9445 || (GET_CODE (addr) == PLUS
9446 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9448 gcc_assert (GET_CODE (addr) == PLUS);
9450 if (GET_CODE (addr) == PLUS)
9452 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9454 /* Check first to see if this is a constant offset from a @GOTOFF
9455 symbol reference. */
9456 if (gotoff_operand (op0, Pmode)
9457 && CONST_INT_P (op1))
9461 if (reload_in_progress)
9462 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9463 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9465 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9466 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9467 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9471 emit_move_insn (reg, new_rtx);
9477 if (INTVAL (op1) < -16*1024*1024
9478 || INTVAL (op1) >= 16*1024*1024)
9480 if (!x86_64_immediate_operand (op1, Pmode))
9481 op1 = force_reg (Pmode, op1);
9482 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9488 base = legitimize_pic_address (XEXP (addr, 0), reg);
9489 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9490 base == reg ? NULL_RTX : reg);
9492 if (CONST_INT_P (new_rtx))
9493 new_rtx = plus_constant (base, INTVAL (new_rtx));
9496 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9498 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9499 new_rtx = XEXP (new_rtx, 1);
9501 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9509 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9512 get_thread_pointer (int to_reg)
9516 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9520 reg = gen_reg_rtx (Pmode);
9521 insn = gen_rtx_SET (VOIDmode, reg, tp);
9522 insn = emit_insn (insn);
9527 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9528 false if we expect this to be used for a memory address and true if
9529 we expect to load the address into a register. */
9532 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9534 rtx dest, base, off, pic, tp;
9539 case TLS_MODEL_GLOBAL_DYNAMIC:
9540 dest = gen_reg_rtx (Pmode);
9541 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9543 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9545 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9548 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9549 insns = get_insns ();
9552 RTL_CONST_CALL_P (insns) = 1;
9553 emit_libcall_block (insns, dest, rax, x);
9555 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9556 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9558 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9560 if (TARGET_GNU2_TLS)
9562 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9564 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9568 case TLS_MODEL_LOCAL_DYNAMIC:
9569 base = gen_reg_rtx (Pmode);
9570 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9572 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9574 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9577 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9578 insns = get_insns ();
9581 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9582 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9583 RTL_CONST_CALL_P (insns) = 1;
9584 emit_libcall_block (insns, base, rax, note);
9586 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9587 emit_insn (gen_tls_local_dynamic_base_64 (base));
9589 emit_insn (gen_tls_local_dynamic_base_32 (base));
9591 if (TARGET_GNU2_TLS)
9593 rtx x = ix86_tls_module_base ();
9595 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9596 gen_rtx_MINUS (Pmode, x, tp));
9599 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9600 off = gen_rtx_CONST (Pmode, off);
9602 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9604 if (TARGET_GNU2_TLS)
9606 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9608 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9613 case TLS_MODEL_INITIAL_EXEC:
9617 type = UNSPEC_GOTNTPOFF;
9621 if (reload_in_progress)
9622 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9623 pic = pic_offset_table_rtx;
9624 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9626 else if (!TARGET_ANY_GNU_TLS)
9628 pic = gen_reg_rtx (Pmode);
9629 emit_insn (gen_set_got (pic));
9630 type = UNSPEC_GOTTPOFF;
9635 type = UNSPEC_INDNTPOFF;
9638 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9639 off = gen_rtx_CONST (Pmode, off);
9641 off = gen_rtx_PLUS (Pmode, pic, off);
9642 off = gen_const_mem (Pmode, off);
9643 set_mem_alias_set (off, ix86_GOT_alias_set ());
9645 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9647 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9648 off = force_reg (Pmode, off);
9649 return gen_rtx_PLUS (Pmode, base, off);
9653 base = get_thread_pointer (true);
9654 dest = gen_reg_rtx (Pmode);
9655 emit_insn (gen_subsi3 (dest, base, off));
9659 case TLS_MODEL_LOCAL_EXEC:
9660 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9661 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9662 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9663 off = gen_rtx_CONST (Pmode, off);
9665 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9667 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9668 return gen_rtx_PLUS (Pmode, base, off);
9672 base = get_thread_pointer (true);
9673 dest = gen_reg_rtx (Pmode);
9674 emit_insn (gen_subsi3 (dest, base, off));
9685 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9688 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9689 htab_t dllimport_map;
9692 get_dllimport_decl (tree decl)
9694 struct tree_map *h, in;
9698 size_t namelen, prefixlen;
9704 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9706 in.hash = htab_hash_pointer (decl);
9707 in.base.from = decl;
9708 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9709 h = (struct tree_map *) *loc;
9713 *loc = h = GGC_NEW (struct tree_map);
9715 h->base.from = decl;
9716 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9717 DECL_ARTIFICIAL (to) = 1;
9718 DECL_IGNORED_P (to) = 1;
9719 DECL_EXTERNAL (to) = 1;
9720 TREE_READONLY (to) = 1;
9722 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9723 name = targetm.strip_name_encoding (name);
9724 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9725 ? "*__imp_" : "*__imp__";
9726 namelen = strlen (name);
9727 prefixlen = strlen (prefix);
9728 imp_name = (char *) alloca (namelen + prefixlen + 1);
9729 memcpy (imp_name, prefix, prefixlen);
9730 memcpy (imp_name + prefixlen, name, namelen + 1);
9732 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9733 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9734 SET_SYMBOL_REF_DECL (rtl, to);
9735 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9737 rtl = gen_const_mem (Pmode, rtl);
9738 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9740 SET_DECL_RTL (to, rtl);
9741 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9746 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9747 true if we require the result be a register. */
9750 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9755 gcc_assert (SYMBOL_REF_DECL (symbol));
9756 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
9758 x = DECL_RTL (imp_decl);
9760 x = force_reg (Pmode, x);
9764 /* Try machine-dependent ways of modifying an illegitimate address
9765 to be legitimate. If we find one, return the new, valid address.
9766 This macro is used in only one place: `memory_address' in explow.c.
9768 OLDX is the address as it was before break_out_memory_refs was called.
9769 In some cases it is useful to look at this to decide what needs to be done.
9771 MODE and WIN are passed so that this macro can use
9772 GO_IF_LEGITIMATE_ADDRESS.
9774 It is always safe for this macro to do nothing. It exists to recognize
9775 opportunities to optimize the output.
9777 For the 80386, we handle X+REG by loading X into a register R and
9778 using R+REG. R will go in a general reg and indexing will be used.
9779 However, if REG is a broken-out memory address or multiplication,
9780 nothing needs to be done because REG can certainly go in a general reg.
9782 When -fpic is used, special handling is needed for symbolic references.
9783 See comments by legitimize_pic_address in i386.c for details. */
9786 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
9791 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
9793 return legitimize_tls_address (x, (enum tls_model) log, false);
9794 if (GET_CODE (x) == CONST
9795 && GET_CODE (XEXP (x, 0)) == PLUS
9796 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9797 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
9799 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
9800 (enum tls_model) log, false);
9801 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9804 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9806 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
9807 return legitimize_dllimport_symbol (x, true);
9808 if (GET_CODE (x) == CONST
9809 && GET_CODE (XEXP (x, 0)) == PLUS
9810 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9811 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
9813 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
9814 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9818 if (flag_pic && SYMBOLIC_CONST (x))
9819 return legitimize_pic_address (x, 0);
9821 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
9822 if (GET_CODE (x) == ASHIFT
9823 && CONST_INT_P (XEXP (x, 1))
9824 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
9827 log = INTVAL (XEXP (x, 1));
9828 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
9829 GEN_INT (1 << log));
9832 if (GET_CODE (x) == PLUS)
9834 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
9836 if (GET_CODE (XEXP (x, 0)) == ASHIFT
9837 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9838 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
9841 log = INTVAL (XEXP (XEXP (x, 0), 1));
9842 XEXP (x, 0) = gen_rtx_MULT (Pmode,
9843 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
9844 GEN_INT (1 << log));
9847 if (GET_CODE (XEXP (x, 1)) == ASHIFT
9848 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9849 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
9852 log = INTVAL (XEXP (XEXP (x, 1), 1));
9853 XEXP (x, 1) = gen_rtx_MULT (Pmode,
9854 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
9855 GEN_INT (1 << log));
9858 /* Put multiply first if it isn't already. */
9859 if (GET_CODE (XEXP (x, 1)) == MULT)
9861 rtx tmp = XEXP (x, 0);
9862 XEXP (x, 0) = XEXP (x, 1);
9867 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
9868 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
9869 created by virtual register instantiation, register elimination, and
9870 similar optimizations. */
9871 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
9874 x = gen_rtx_PLUS (Pmode,
9875 gen_rtx_PLUS (Pmode, XEXP (x, 0),
9876 XEXP (XEXP (x, 1), 0)),
9877 XEXP (XEXP (x, 1), 1));
9881 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
9882 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
9883 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
9884 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9885 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
9886 && CONSTANT_P (XEXP (x, 1)))
9889 rtx other = NULL_RTX;
9891 if (CONST_INT_P (XEXP (x, 1)))
9893 constant = XEXP (x, 1);
9894 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
9896 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
9898 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
9899 other = XEXP (x, 1);
9907 x = gen_rtx_PLUS (Pmode,
9908 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
9909 XEXP (XEXP (XEXP (x, 0), 1), 0)),
9910 plus_constant (other, INTVAL (constant)));
9914 if (changed && legitimate_address_p (mode, x, FALSE))
9917 if (GET_CODE (XEXP (x, 0)) == MULT)
9920 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
9923 if (GET_CODE (XEXP (x, 1)) == MULT)
9926 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
9930 && REG_P (XEXP (x, 1))
9931 && REG_P (XEXP (x, 0)))
9934 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
9937 x = legitimize_pic_address (x, 0);
9940 if (changed && legitimate_address_p (mode, x, FALSE))
9943 if (REG_P (XEXP (x, 0)))
9945 rtx temp = gen_reg_rtx (Pmode);
9946 rtx val = force_operand (XEXP (x, 1), temp);
9948 emit_move_insn (temp, val);
9954 else if (REG_P (XEXP (x, 1)))
9956 rtx temp = gen_reg_rtx (Pmode);
9957 rtx val = force_operand (XEXP (x, 0), temp);
9959 emit_move_insn (temp, val);
9969 /* Print an integer constant expression in assembler syntax. Addition
9970 and subtraction are the only arithmetic that may appear in these
9971 expressions. FILE is the stdio stream to write to, X is the rtx, and
9972 CODE is the operand print code from the output string. */
9975 output_pic_addr_const (FILE *file, rtx x, int code)
9979 switch (GET_CODE (x))
9982 gcc_assert (flag_pic);
9987 if (! TARGET_MACHO || TARGET_64BIT)
9988 output_addr_const (file, x);
9991 const char *name = XSTR (x, 0);
9993 /* Mark the decl as referenced so that cgraph will
9994 output the function. */
9995 if (SYMBOL_REF_DECL (x))
9996 mark_decl_referenced (SYMBOL_REF_DECL (x));
9999 if (MACHOPIC_INDIRECT
10000 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10001 name = machopic_indirection_name (x, /*stub_p=*/true);
10003 assemble_name (file, name);
10005 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10006 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10007 fputs ("@PLT", file);
10014 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10015 assemble_name (asm_out_file, buf);
10019 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10023 /* This used to output parentheses around the expression,
10024 but that does not work on the 386 (either ATT or BSD assembler). */
10025 output_pic_addr_const (file, XEXP (x, 0), code);
10029 if (GET_MODE (x) == VOIDmode)
10031 /* We can use %d if the number is <32 bits and positive. */
10032 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10033 fprintf (file, "0x%lx%08lx",
10034 (unsigned long) CONST_DOUBLE_HIGH (x),
10035 (unsigned long) CONST_DOUBLE_LOW (x));
10037 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10040 /* We can't handle floating point constants;
10041 PRINT_OPERAND must handle them. */
10042 output_operand_lossage ("floating constant misused");
10046 /* Some assemblers need integer constants to appear first. */
10047 if (CONST_INT_P (XEXP (x, 0)))
10049 output_pic_addr_const (file, XEXP (x, 0), code);
10051 output_pic_addr_const (file, XEXP (x, 1), code);
10055 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10056 output_pic_addr_const (file, XEXP (x, 1), code);
10058 output_pic_addr_const (file, XEXP (x, 0), code);
10064 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10065 output_pic_addr_const (file, XEXP (x, 0), code);
10067 output_pic_addr_const (file, XEXP (x, 1), code);
10069 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10073 gcc_assert (XVECLEN (x, 0) == 1);
10074 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10075 switch (XINT (x, 1))
10078 fputs ("@GOT", file);
10080 case UNSPEC_GOTOFF:
10081 fputs ("@GOTOFF", file);
10083 case UNSPEC_PLTOFF:
10084 fputs ("@PLTOFF", file);
10086 case UNSPEC_GOTPCREL:
10087 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10088 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10090 case UNSPEC_GOTTPOFF:
10091 /* FIXME: This might be @TPOFF in Sun ld too. */
10092 fputs ("@GOTTPOFF", file);
10095 fputs ("@TPOFF", file);
10097 case UNSPEC_NTPOFF:
10099 fputs ("@TPOFF", file);
10101 fputs ("@NTPOFF", file);
10103 case UNSPEC_DTPOFF:
10104 fputs ("@DTPOFF", file);
10106 case UNSPEC_GOTNTPOFF:
10108 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10109 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10111 fputs ("@GOTNTPOFF", file);
10113 case UNSPEC_INDNTPOFF:
10114 fputs ("@INDNTPOFF", file);
10117 case UNSPEC_MACHOPIC_OFFSET:
10119 machopic_output_function_base_name (file);
10123 output_operand_lossage ("invalid UNSPEC as operand");
10129 output_operand_lossage ("invalid expression as operand");
10133 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10134 We need to emit DTP-relative relocations. */
10136 static void ATTRIBUTE_UNUSED
10137 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10139 fputs (ASM_LONG, file);
10140 output_addr_const (file, x);
10141 fputs ("@DTPOFF", file);
10147 fputs (", 0", file);
10150 gcc_unreachable ();
10154 /* Return true if X is a representation of the PIC register. This copes
10155 with calls from ix86_find_base_term, where the register might have
10156 been replaced by a cselib value. */
10159 ix86_pic_register_p (rtx x)
10161 if (GET_CODE (x) == VALUE)
10162 return (pic_offset_table_rtx
10163 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10165 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10168 /* In the name of slightly smaller debug output, and to cater to
10169 general assembler lossage, recognize PIC+GOTOFF and turn it back
10170 into a direct symbol reference.
10172 On Darwin, this is necessary to avoid a crash, because Darwin
10173 has a different PIC label for each routine but the DWARF debugging
10174 information is not associated with any particular routine, so it's
10175 necessary to remove references to the PIC label from RTL stored by
10176 the DWARF output code. */
10179 ix86_delegitimize_address (rtx orig_x)
10182 /* reg_addend is NULL or a multiple of some register. */
10183 rtx reg_addend = NULL_RTX;
10184 /* const_addend is NULL or a const_int. */
10185 rtx const_addend = NULL_RTX;
10186 /* This is the result, or NULL. */
10187 rtx result = NULL_RTX;
10194 if (GET_CODE (x) != CONST
10195 || GET_CODE (XEXP (x, 0)) != UNSPEC
10196 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10197 || !MEM_P (orig_x))
10199 return XVECEXP (XEXP (x, 0), 0, 0);
10202 if (GET_CODE (x) != PLUS
10203 || GET_CODE (XEXP (x, 1)) != CONST)
10206 if (ix86_pic_register_p (XEXP (x, 0)))
10207 /* %ebx + GOT/GOTOFF */
10209 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10211 /* %ebx + %reg * scale + GOT/GOTOFF */
10212 reg_addend = XEXP (x, 0);
10213 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10214 reg_addend = XEXP (reg_addend, 1);
10215 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10216 reg_addend = XEXP (reg_addend, 0);
10219 if (!REG_P (reg_addend)
10220 && GET_CODE (reg_addend) != MULT
10221 && GET_CODE (reg_addend) != ASHIFT)
10227 x = XEXP (XEXP (x, 1), 0);
10228 if (GET_CODE (x) == PLUS
10229 && CONST_INT_P (XEXP (x, 1)))
10231 const_addend = XEXP (x, 1);
10235 if (GET_CODE (x) == UNSPEC
10236 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10237 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10238 result = XVECEXP (x, 0, 0);
10240 if (TARGET_MACHO && darwin_local_data_pic (x)
10241 && !MEM_P (orig_x))
10242 result = XVECEXP (x, 0, 0);
10248 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10250 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10254 /* If X is a machine specific address (i.e. a symbol or label being
10255 referenced as a displacement from the GOT implemented using an
10256 UNSPEC), then return the base term. Otherwise return X. */
10259 ix86_find_base_term (rtx x)
10265 if (GET_CODE (x) != CONST)
10267 term = XEXP (x, 0);
10268 if (GET_CODE (term) == PLUS
10269 && (CONST_INT_P (XEXP (term, 1))
10270 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10271 term = XEXP (term, 0);
10272 if (GET_CODE (term) != UNSPEC
10273 || XINT (term, 1) != UNSPEC_GOTPCREL)
10276 return XVECEXP (term, 0, 0);
10279 return ix86_delegitimize_address (x);
10283 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10284 int fp, FILE *file)
10286 const char *suffix;
10288 if (mode == CCFPmode || mode == CCFPUmode)
10290 enum rtx_code second_code, bypass_code;
10291 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10292 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10293 code = ix86_fp_compare_code_to_integer (code);
10297 code = reverse_condition (code);
10348 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10352 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10353 Those same assemblers have the same but opposite lossage on cmov. */
10354 if (mode == CCmode)
10355 suffix = fp ? "nbe" : "a";
10356 else if (mode == CCCmode)
10359 gcc_unreachable ();
10375 gcc_unreachable ();
10379 gcc_assert (mode == CCmode || mode == CCCmode);
10396 gcc_unreachable ();
10400 /* ??? As above. */
10401 gcc_assert (mode == CCmode || mode == CCCmode);
10402 suffix = fp ? "nb" : "ae";
10405 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10409 /* ??? As above. */
10410 if (mode == CCmode)
10412 else if (mode == CCCmode)
10413 suffix = fp ? "nb" : "ae";
10415 gcc_unreachable ();
10418 suffix = fp ? "u" : "p";
10421 suffix = fp ? "nu" : "np";
10424 gcc_unreachable ();
10426 fputs (suffix, file);
10429 /* Print the name of register X to FILE based on its machine mode and number.
10430 If CODE is 'w', pretend the mode is HImode.
10431 If CODE is 'b', pretend the mode is QImode.
10432 If CODE is 'k', pretend the mode is SImode.
10433 If CODE is 'q', pretend the mode is DImode.
10434 If CODE is 'x', pretend the mode is V4SFmode.
10435 If CODE is 't', pretend the mode is V8SFmode.
10436 If CODE is 'h', pretend the reg is the 'high' byte register.
10437 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10438 If CODE is 'd', duplicate the operand for AVX instruction.
10442 print_reg (rtx x, int code, FILE *file)
10445 bool duplicated = code == 'd' && TARGET_AVX;
10447 gcc_assert (x == pc_rtx
10448 || (REGNO (x) != ARG_POINTER_REGNUM
10449 && REGNO (x) != FRAME_POINTER_REGNUM
10450 && REGNO (x) != FLAGS_REG
10451 && REGNO (x) != FPSR_REG
10452 && REGNO (x) != FPCR_REG));
10454 if (ASSEMBLER_DIALECT == ASM_ATT)
10459 gcc_assert (TARGET_64BIT);
10460 fputs ("rip", file);
10464 if (code == 'w' || MMX_REG_P (x))
10466 else if (code == 'b')
10468 else if (code == 'k')
10470 else if (code == 'q')
10472 else if (code == 'y')
10474 else if (code == 'h')
10476 else if (code == 'x')
10478 else if (code == 't')
10481 code = GET_MODE_SIZE (GET_MODE (x));
10483 /* Irritatingly, AMD extended registers use different naming convention
10484 from the normal registers. */
10485 if (REX_INT_REG_P (x))
10487 gcc_assert (TARGET_64BIT);
10491 error ("extended registers have no high halves");
10494 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10497 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10500 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10503 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10506 error ("unsupported operand size for extended register");
10516 if (STACK_TOP_P (x))
10525 if (! ANY_FP_REG_P (x))
10526 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10531 reg = hi_reg_name[REGNO (x)];
10534 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10536 reg = qi_reg_name[REGNO (x)];
10539 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10541 reg = qi_high_reg_name[REGNO (x)];
10546 gcc_assert (!duplicated);
10548 fputs (hi_reg_name[REGNO (x)] + 1, file);
10553 gcc_unreachable ();
10559 if (ASSEMBLER_DIALECT == ASM_ATT)
10560 fprintf (file, ", %%%s", reg);
10562 fprintf (file, ", %s", reg);
10566 /* Locate some local-dynamic symbol still in use by this function
10567 so that we can print its name in some tls_local_dynamic_base
10571 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10575 if (GET_CODE (x) == SYMBOL_REF
10576 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10578 cfun->machine->some_ld_name = XSTR (x, 0);
10585 static const char *
10586 get_some_local_dynamic_name (void)
10590 if (cfun->machine->some_ld_name)
10591 return cfun->machine->some_ld_name;
10593 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10595 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10596 return cfun->machine->some_ld_name;
10598 gcc_unreachable ();
10601 /* Meaning of CODE:
10602 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10603 C -- print opcode suffix for set/cmov insn.
10604 c -- like C, but print reversed condition
10605 E,e -- likewise, but for compare-and-branch fused insn.
10606 F,f -- likewise, but for floating-point.
10607 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10609 R -- print the prefix for register names.
10610 z -- print the opcode suffix for the size of the current operand.
10611 * -- print a star (in certain assembler syntax)
10612 A -- print an absolute memory reference.
10613 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10614 s -- print a shift double count, followed by the assemblers argument
10616 b -- print the QImode name of the register for the indicated operand.
10617 %b0 would print %al if operands[0] is reg 0.
10618 w -- likewise, print the HImode name of the register.
10619 k -- likewise, print the SImode name of the register.
10620 q -- likewise, print the DImode name of the register.
10621 x -- likewise, print the V4SFmode name of the register.
10622 t -- likewise, print the V8SFmode name of the register.
10623 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10624 y -- print "st(0)" instead of "st" as a register.
10625 d -- print duplicated register operand for AVX instruction.
10626 D -- print condition for SSE cmp instruction.
10627 P -- if PIC, print an @PLT suffix.
10628 X -- don't print any sort of PIC '@' suffix for a symbol.
10629 & -- print some in-use local-dynamic symbol name.
10630 H -- print a memory address offset by 8; used for sse high-parts
10631 Y -- print condition for SSE5 com* instruction.
10632 + -- print a branch hint as 'cs' or 'ds' prefix
10633 ; -- print a semicolon (after prefixes due to bug in older gas).
10637 print_operand (FILE *file, rtx x, int code)
10644 if (ASSEMBLER_DIALECT == ASM_ATT)
10649 assemble_name (file, get_some_local_dynamic_name ());
10653 switch (ASSEMBLER_DIALECT)
10660 /* Intel syntax. For absolute addresses, registers should not
10661 be surrounded by braces. */
10665 PRINT_OPERAND (file, x, 0);
10672 gcc_unreachable ();
10675 PRINT_OPERAND (file, x, 0);
10680 if (ASSEMBLER_DIALECT == ASM_ATT)
10685 if (ASSEMBLER_DIALECT == ASM_ATT)
10690 if (ASSEMBLER_DIALECT == ASM_ATT)
10695 if (ASSEMBLER_DIALECT == ASM_ATT)
10700 if (ASSEMBLER_DIALECT == ASM_ATT)
10705 if (ASSEMBLER_DIALECT == ASM_ATT)
10710 /* 387 opcodes don't get size suffixes if the operands are
10712 if (STACK_REG_P (x))
10715 /* Likewise if using Intel opcodes. */
10716 if (ASSEMBLER_DIALECT == ASM_INTEL)
10719 /* This is the size of op from size of operand. */
10720 switch (GET_MODE_SIZE (GET_MODE (x)))
10729 #ifdef HAVE_GAS_FILDS_FISTS
10739 if (GET_MODE (x) == SFmode)
10754 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10758 #ifdef GAS_MNEMONICS
10773 gcc_unreachable ();
10790 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
10792 PRINT_OPERAND (file, x, 0);
10793 fputs (", ", file);
10798 /* Little bit of braindamage here. The SSE compare instructions
10799 does use completely different names for the comparisons that the
10800 fp conditional moves. */
10803 switch (GET_CODE (x))
10806 fputs ("eq", file);
10809 fputs ("eq_us", file);
10812 fputs ("lt", file);
10815 fputs ("nge", file);
10818 fputs ("le", file);
10821 fputs ("ngt", file);
10824 fputs ("unord", file);
10827 fputs ("neq", file);
10830 fputs ("neq_oq", file);
10833 fputs ("ge", file);
10836 fputs ("nlt", file);
10839 fputs ("gt", file);
10842 fputs ("nle", file);
10845 fputs ("ord", file);
10848 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
10854 switch (GET_CODE (x))
10858 fputs ("eq", file);
10862 fputs ("lt", file);
10866 fputs ("le", file);
10869 fputs ("unord", file);
10873 fputs ("neq", file);
10877 fputs ("nlt", file);
10881 fputs ("nle", file);
10884 fputs ("ord", file);
10887 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
10893 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10894 if (ASSEMBLER_DIALECT == ASM_ATT)
10896 switch (GET_MODE (x))
10898 case HImode: putc ('w', file); break;
10900 case SFmode: putc ('l', file); break;
10902 case DFmode: putc ('q', file); break;
10903 default: gcc_unreachable ();
10910 if (!COMPARISON_P (x))
10912 output_operand_lossage ("operand is neither a constant nor a "
10913 "condition code, invalid operand code "
10917 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
10920 if (!COMPARISON_P (x))
10922 output_operand_lossage ("operand is neither a constant nor a "
10923 "condition code, invalid operand code "
10927 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10928 if (ASSEMBLER_DIALECT == ASM_ATT)
10931 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
10934 /* Like above, but reverse condition */
10936 /* Check to see if argument to %c is really a constant
10937 and not a condition code which needs to be reversed. */
10938 if (!COMPARISON_P (x))
10940 output_operand_lossage ("operand is neither a constant nor a "
10941 "condition code, invalid operand "
10945 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
10948 if (!COMPARISON_P (x))
10950 output_operand_lossage ("operand is neither a constant nor a "
10951 "condition code, invalid operand "
10955 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10956 if (ASSEMBLER_DIALECT == ASM_ATT)
10959 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
10963 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
10967 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
10971 /* It doesn't actually matter what mode we use here, as we're
10972 only going to use this for printing. */
10973 x = adjust_address_nv (x, DImode, 8);
10981 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
10984 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
10987 int pred_val = INTVAL (XEXP (x, 0));
10989 if (pred_val < REG_BR_PROB_BASE * 45 / 100
10990 || pred_val > REG_BR_PROB_BASE * 55 / 100)
10992 int taken = pred_val > REG_BR_PROB_BASE / 2;
10993 int cputaken = final_forward_branch_p (current_output_insn) == 0;
10995 /* Emit hints only in the case default branch prediction
10996 heuristics would fail. */
10997 if (taken != cputaken)
10999 /* We use 3e (DS) prefix for taken branches and
11000 2e (CS) prefix for not taken branches. */
11002 fputs ("ds ; ", file);
11004 fputs ("cs ; ", file);
11012 switch (GET_CODE (x))
11015 fputs ("neq", file);
11018 fputs ("eq", file);
11022 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11026 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11030 fputs ("le", file);
11034 fputs ("lt", file);
11037 fputs ("unord", file);
11040 fputs ("ord", file);
11043 fputs ("ueq", file);
11046 fputs ("nlt", file);
11049 fputs ("nle", file);
11052 fputs ("ule", file);
11055 fputs ("ult", file);
11058 fputs ("une", file);
11061 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11068 fputs (" ; ", file);
11075 output_operand_lossage ("invalid operand code '%c'", code);
11080 print_reg (x, code, file);
11082 else if (MEM_P (x))
11084 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11085 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11086 && GET_MODE (x) != BLKmode)
11089 switch (GET_MODE_SIZE (GET_MODE (x)))
11091 case 1: size = "BYTE"; break;
11092 case 2: size = "WORD"; break;
11093 case 4: size = "DWORD"; break;
11094 case 8: size = "QWORD"; break;
11095 case 12: size = "XWORD"; break;
11097 if (GET_MODE (x) == XFmode)
11103 gcc_unreachable ();
11106 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11109 else if (code == 'w')
11111 else if (code == 'k')
11114 fputs (size, file);
11115 fputs (" PTR ", file);
11119 /* Avoid (%rip) for call operands. */
11120 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11121 && !CONST_INT_P (x))
11122 output_addr_const (file, x);
11123 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11124 output_operand_lossage ("invalid constraints for operand");
11126 output_address (x);
11129 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11134 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11135 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11137 if (ASSEMBLER_DIALECT == ASM_ATT)
11139 fprintf (file, "0x%08lx", (long unsigned int) l);
11142 /* These float cases don't actually occur as immediate operands. */
11143 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11147 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11148 fprintf (file, "%s", dstr);
11151 else if (GET_CODE (x) == CONST_DOUBLE
11152 && GET_MODE (x) == XFmode)
11156 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11157 fprintf (file, "%s", dstr);
11162 /* We have patterns that allow zero sets of memory, for instance.
11163 In 64-bit mode, we should probably support all 8-byte vectors,
11164 since we can in fact encode that into an immediate. */
11165 if (GET_CODE (x) == CONST_VECTOR)
11167 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11173 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11175 if (ASSEMBLER_DIALECT == ASM_ATT)
11178 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11179 || GET_CODE (x) == LABEL_REF)
11181 if (ASSEMBLER_DIALECT == ASM_ATT)
11184 fputs ("OFFSET FLAT:", file);
11187 if (CONST_INT_P (x))
11188 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11190 output_pic_addr_const (file, x, code);
11192 output_addr_const (file, x);
11196 /* Print a memory operand whose address is ADDR. */
11199 print_operand_address (FILE *file, rtx addr)
11201 struct ix86_address parts;
11202 rtx base, index, disp;
11204 int ok = ix86_decompose_address (addr, &parts);
11209 index = parts.index;
11211 scale = parts.scale;
11219 if (ASSEMBLER_DIALECT == ASM_ATT)
11221 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11224 gcc_unreachable ();
11227 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11228 if (TARGET_64BIT && !base && !index)
11232 if (GET_CODE (disp) == CONST
11233 && GET_CODE (XEXP (disp, 0)) == PLUS
11234 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11235 symbol = XEXP (XEXP (disp, 0), 0);
11237 if (GET_CODE (symbol) == LABEL_REF
11238 || (GET_CODE (symbol) == SYMBOL_REF
11239 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11242 if (!base && !index)
11244 /* Displacement only requires special attention. */
11246 if (CONST_INT_P (disp))
11248 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11249 fputs ("ds:", file);
11250 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11253 output_pic_addr_const (file, disp, 0);
11255 output_addr_const (file, disp);
11259 if (ASSEMBLER_DIALECT == ASM_ATT)
11264 output_pic_addr_const (file, disp, 0);
11265 else if (GET_CODE (disp) == LABEL_REF)
11266 output_asm_label (disp);
11268 output_addr_const (file, disp);
11273 print_reg (base, 0, file);
11277 print_reg (index, 0, file);
11279 fprintf (file, ",%d", scale);
11285 rtx offset = NULL_RTX;
11289 /* Pull out the offset of a symbol; print any symbol itself. */
11290 if (GET_CODE (disp) == CONST
11291 && GET_CODE (XEXP (disp, 0)) == PLUS
11292 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11294 offset = XEXP (XEXP (disp, 0), 1);
11295 disp = gen_rtx_CONST (VOIDmode,
11296 XEXP (XEXP (disp, 0), 0));
11300 output_pic_addr_const (file, disp, 0);
11301 else if (GET_CODE (disp) == LABEL_REF)
11302 output_asm_label (disp);
11303 else if (CONST_INT_P (disp))
11306 output_addr_const (file, disp);
11312 print_reg (base, 0, file);
11315 if (INTVAL (offset) >= 0)
11317 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11321 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11328 print_reg (index, 0, file);
11330 fprintf (file, "*%d", scale);
11338 output_addr_const_extra (FILE *file, rtx x)
11342 if (GET_CODE (x) != UNSPEC)
11345 op = XVECEXP (x, 0, 0);
11346 switch (XINT (x, 1))
11348 case UNSPEC_GOTTPOFF:
11349 output_addr_const (file, op);
11350 /* FIXME: This might be @TPOFF in Sun ld. */
11351 fputs ("@GOTTPOFF", file);
11354 output_addr_const (file, op);
11355 fputs ("@TPOFF", file);
11357 case UNSPEC_NTPOFF:
11358 output_addr_const (file, op);
11360 fputs ("@TPOFF", file);
11362 fputs ("@NTPOFF", file);
11364 case UNSPEC_DTPOFF:
11365 output_addr_const (file, op);
11366 fputs ("@DTPOFF", file);
11368 case UNSPEC_GOTNTPOFF:
11369 output_addr_const (file, op);
11371 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11372 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11374 fputs ("@GOTNTPOFF", file);
11376 case UNSPEC_INDNTPOFF:
11377 output_addr_const (file, op);
11378 fputs ("@INDNTPOFF", file);
11381 case UNSPEC_MACHOPIC_OFFSET:
11382 output_addr_const (file, op);
11384 machopic_output_function_base_name (file);
11395 /* Split one or more DImode RTL references into pairs of SImode
11396 references. The RTL can be REG, offsettable MEM, integer constant, or
11397 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11398 split and "num" is its length. lo_half and hi_half are output arrays
11399 that parallel "operands". */
11402 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11406 rtx op = operands[num];
11408 /* simplify_subreg refuse to split volatile memory addresses,
11409 but we still have to handle it. */
11412 lo_half[num] = adjust_address (op, SImode, 0);
11413 hi_half[num] = adjust_address (op, SImode, 4);
11417 lo_half[num] = simplify_gen_subreg (SImode, op,
11418 GET_MODE (op) == VOIDmode
11419 ? DImode : GET_MODE (op), 0);
11420 hi_half[num] = simplify_gen_subreg (SImode, op,
11421 GET_MODE (op) == VOIDmode
11422 ? DImode : GET_MODE (op), 4);
11426 /* Split one or more TImode RTL references into pairs of DImode
11427 references. The RTL can be REG, offsettable MEM, integer constant, or
11428 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11429 split and "num" is its length. lo_half and hi_half are output arrays
11430 that parallel "operands". */
11433 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11437 rtx op = operands[num];
11439 /* simplify_subreg refuse to split volatile memory addresses, but we
11440 still have to handle it. */
11443 lo_half[num] = adjust_address (op, DImode, 0);
11444 hi_half[num] = adjust_address (op, DImode, 8);
11448 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11449 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11454 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11455 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11456 is the expression of the binary operation. The output may either be
11457 emitted here, or returned to the caller, like all output_* functions.
11459 There is no guarantee that the operands are the same mode, as they
11460 might be within FLOAT or FLOAT_EXTEND expressions. */
11462 #ifndef SYSV386_COMPAT
11463 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11464 wants to fix the assemblers because that causes incompatibility
11465 with gcc. No-one wants to fix gcc because that causes
11466 incompatibility with assemblers... You can use the option of
11467 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11468 #define SYSV386_COMPAT 1
11472 output_387_binary_op (rtx insn, rtx *operands)
11474 static char buf[40];
11477 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11479 #ifdef ENABLE_CHECKING
11480 /* Even if we do not want to check the inputs, this documents input
11481 constraints. Which helps in understanding the following code. */
11482 if (STACK_REG_P (operands[0])
11483 && ((REG_P (operands[1])
11484 && REGNO (operands[0]) == REGNO (operands[1])
11485 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11486 || (REG_P (operands[2])
11487 && REGNO (operands[0]) == REGNO (operands[2])
11488 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11489 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11492 gcc_assert (is_sse);
11495 switch (GET_CODE (operands[3]))
11498 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11499 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11507 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11508 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11516 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11517 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11525 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11526 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11534 gcc_unreachable ();
11541 strcpy (buf, ssep);
11542 if (GET_MODE (operands[0]) == SFmode)
11543 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11545 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11549 strcpy (buf, ssep + 1);
11550 if (GET_MODE (operands[0]) == SFmode)
11551 strcat (buf, "ss\t{%2, %0|%0, %2}");
11553 strcat (buf, "sd\t{%2, %0|%0, %2}");
11559 switch (GET_CODE (operands[3]))
11563 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11565 rtx temp = operands[2];
11566 operands[2] = operands[1];
11567 operands[1] = temp;
11570 /* know operands[0] == operands[1]. */
11572 if (MEM_P (operands[2]))
11578 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11580 if (STACK_TOP_P (operands[0]))
11581 /* How is it that we are storing to a dead operand[2]?
11582 Well, presumably operands[1] is dead too. We can't
11583 store the result to st(0) as st(0) gets popped on this
11584 instruction. Instead store to operands[2] (which I
11585 think has to be st(1)). st(1) will be popped later.
11586 gcc <= 2.8.1 didn't have this check and generated
11587 assembly code that the Unixware assembler rejected. */
11588 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11590 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11594 if (STACK_TOP_P (operands[0]))
11595 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11597 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11602 if (MEM_P (operands[1]))
11608 if (MEM_P (operands[2]))
11614 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11617 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11618 derived assemblers, confusingly reverse the direction of
11619 the operation for fsub{r} and fdiv{r} when the
11620 destination register is not st(0). The Intel assembler
11621 doesn't have this brain damage. Read !SYSV386_COMPAT to
11622 figure out what the hardware really does. */
11623 if (STACK_TOP_P (operands[0]))
11624 p = "{p\t%0, %2|rp\t%2, %0}";
11626 p = "{rp\t%2, %0|p\t%0, %2}";
11628 if (STACK_TOP_P (operands[0]))
11629 /* As above for fmul/fadd, we can't store to st(0). */
11630 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11632 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11637 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11640 if (STACK_TOP_P (operands[0]))
11641 p = "{rp\t%0, %1|p\t%1, %0}";
11643 p = "{p\t%1, %0|rp\t%0, %1}";
11645 if (STACK_TOP_P (operands[0]))
11646 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11648 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11653 if (STACK_TOP_P (operands[0]))
11655 if (STACK_TOP_P (operands[1]))
11656 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11658 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11661 else if (STACK_TOP_P (operands[1]))
11664 p = "{\t%1, %0|r\t%0, %1}";
11666 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11672 p = "{r\t%2, %0|\t%0, %2}";
11674 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11680 gcc_unreachable ();
11687 /* Return needed mode for entity in optimize_mode_switching pass. */
11690 ix86_mode_needed (int entity, rtx insn)
11692 enum attr_i387_cw mode;
11694 /* The mode UNINITIALIZED is used to store control word after a
11695 function call or ASM pattern. The mode ANY specify that function
11696 has no requirements on the control word and make no changes in the
11697 bits we are interested in. */
11700 || (NONJUMP_INSN_P (insn)
11701 && (asm_noperands (PATTERN (insn)) >= 0
11702 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11703 return I387_CW_UNINITIALIZED;
11705 if (recog_memoized (insn) < 0)
11706 return I387_CW_ANY;
11708 mode = get_attr_i387_cw (insn);
11713 if (mode == I387_CW_TRUNC)
11718 if (mode == I387_CW_FLOOR)
11723 if (mode == I387_CW_CEIL)
11728 if (mode == I387_CW_MASK_PM)
11733 gcc_unreachable ();
11736 return I387_CW_ANY;
11739 /* Output code to initialize control word copies used by trunc?f?i and
11740 rounding patterns. CURRENT_MODE is set to current control word,
11741 while NEW_MODE is set to new control word. */
11744 emit_i387_cw_initialization (int mode)
11746 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
11749 enum ix86_stack_slot slot;
11751 rtx reg = gen_reg_rtx (HImode);
11753 emit_insn (gen_x86_fnstcw_1 (stored_mode));
11754 emit_move_insn (reg, copy_rtx (stored_mode));
11756 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
11757 || optimize_function_for_size_p (cfun))
11761 case I387_CW_TRUNC:
11762 /* round toward zero (truncate) */
11763 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
11764 slot = SLOT_CW_TRUNC;
11767 case I387_CW_FLOOR:
11768 /* round down toward -oo */
11769 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11770 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
11771 slot = SLOT_CW_FLOOR;
11775 /* round up toward +oo */
11776 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11777 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
11778 slot = SLOT_CW_CEIL;
11781 case I387_CW_MASK_PM:
11782 /* mask precision exception for nearbyint() */
11783 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11784 slot = SLOT_CW_MASK_PM;
11788 gcc_unreachable ();
11795 case I387_CW_TRUNC:
11796 /* round toward zero (truncate) */
11797 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
11798 slot = SLOT_CW_TRUNC;
11801 case I387_CW_FLOOR:
11802 /* round down toward -oo */
11803 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
11804 slot = SLOT_CW_FLOOR;
11808 /* round up toward +oo */
11809 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
11810 slot = SLOT_CW_CEIL;
11813 case I387_CW_MASK_PM:
11814 /* mask precision exception for nearbyint() */
11815 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11816 slot = SLOT_CW_MASK_PM;
11820 gcc_unreachable ();
11824 gcc_assert (slot < MAX_386_STACK_LOCALS);
11826 new_mode = assign_386_stack_local (HImode, slot);
11827 emit_move_insn (new_mode, reg);
11830 /* Output code for INSN to convert a float to a signed int. OPERANDS
11831 are the insn operands. The output may be [HSD]Imode and the input
11832 operand may be [SDX]Fmode. */
11835 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
11837 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11838 int dimode_p = GET_MODE (operands[0]) == DImode;
11839 int round_mode = get_attr_i387_cw (insn);
11841 /* Jump through a hoop or two for DImode, since the hardware has no
11842 non-popping instruction. We used to do this a different way, but
11843 that was somewhat fragile and broke with post-reload splitters. */
11844 if ((dimode_p || fisttp) && !stack_top_dies)
11845 output_asm_insn ("fld\t%y1", operands);
11847 gcc_assert (STACK_TOP_P (operands[1]));
11848 gcc_assert (MEM_P (operands[0]));
11849 gcc_assert (GET_MODE (operands[1]) != TFmode);
11852 output_asm_insn ("fisttp%z0\t%0", operands);
11855 if (round_mode != I387_CW_ANY)
11856 output_asm_insn ("fldcw\t%3", operands);
11857 if (stack_top_dies || dimode_p)
11858 output_asm_insn ("fistp%z0\t%0", operands);
11860 output_asm_insn ("fist%z0\t%0", operands);
11861 if (round_mode != I387_CW_ANY)
11862 output_asm_insn ("fldcw\t%2", operands);
11868 /* Output code for x87 ffreep insn. The OPNO argument, which may only
11869 have the values zero or one, indicates the ffreep insn's operand
11870 from the OPERANDS array. */
11872 static const char *
11873 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
11875 if (TARGET_USE_FFREEP)
11876 #if HAVE_AS_IX86_FFREEP
11877 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
11880 static char retval[] = ".word\t0xc_df";
11881 int regno = REGNO (operands[opno]);
11883 gcc_assert (FP_REGNO_P (regno));
11885 retval[9] = '0' + (regno - FIRST_STACK_REG);
11890 return opno ? "fstp\t%y1" : "fstp\t%y0";
11894 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
11895 should be used. UNORDERED_P is true when fucom should be used. */
11898 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
11900 int stack_top_dies;
11901 rtx cmp_op0, cmp_op1;
11902 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
11906 cmp_op0 = operands[0];
11907 cmp_op1 = operands[1];
11911 cmp_op0 = operands[1];
11912 cmp_op1 = operands[2];
11917 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
11918 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
11919 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
11920 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
11922 if (GET_MODE (operands[0]) == SFmode)
11924 return &ucomiss[TARGET_AVX ? 0 : 1];
11926 return &comiss[TARGET_AVX ? 0 : 1];
11929 return &ucomisd[TARGET_AVX ? 0 : 1];
11931 return &comisd[TARGET_AVX ? 0 : 1];
11934 gcc_assert (STACK_TOP_P (cmp_op0));
11936 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11938 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
11940 if (stack_top_dies)
11942 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
11943 return output_387_ffreep (operands, 1);
11946 return "ftst\n\tfnstsw\t%0";
11949 if (STACK_REG_P (cmp_op1)
11951 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
11952 && REGNO (cmp_op1) != FIRST_STACK_REG)
11954 /* If both the top of the 387 stack dies, and the other operand
11955 is also a stack register that dies, then this must be a
11956 `fcompp' float compare */
11960 /* There is no double popping fcomi variant. Fortunately,
11961 eflags is immune from the fstp's cc clobbering. */
11963 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
11965 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
11966 return output_387_ffreep (operands, 0);
11971 return "fucompp\n\tfnstsw\t%0";
11973 return "fcompp\n\tfnstsw\t%0";
11978 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
11980 static const char * const alt[16] =
11982 "fcom%z2\t%y2\n\tfnstsw\t%0",
11983 "fcomp%z2\t%y2\n\tfnstsw\t%0",
11984 "fucom%z2\t%y2\n\tfnstsw\t%0",
11985 "fucomp%z2\t%y2\n\tfnstsw\t%0",
11987 "ficom%z2\t%y2\n\tfnstsw\t%0",
11988 "ficomp%z2\t%y2\n\tfnstsw\t%0",
11992 "fcomi\t{%y1, %0|%0, %y1}",
11993 "fcomip\t{%y1, %0|%0, %y1}",
11994 "fucomi\t{%y1, %0|%0, %y1}",
11995 "fucomip\t{%y1, %0|%0, %y1}",
12006 mask = eflags_p << 3;
12007 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12008 mask |= unordered_p << 1;
12009 mask |= stack_top_dies;
12011 gcc_assert (mask < 16);
12020 ix86_output_addr_vec_elt (FILE *file, int value)
12022 const char *directive = ASM_LONG;
12026 directive = ASM_QUAD;
12028 gcc_assert (!TARGET_64BIT);
12031 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12035 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12037 const char *directive = ASM_LONG;
12040 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12041 directive = ASM_QUAD;
12043 gcc_assert (!TARGET_64BIT);
12045 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12046 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12047 fprintf (file, "%s%s%d-%s%d\n",
12048 directive, LPREFIX, value, LPREFIX, rel);
12049 else if (HAVE_AS_GOTOFF_IN_DATA)
12050 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12052 else if (TARGET_MACHO)
12054 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12055 machopic_output_function_base_name (file);
12056 fprintf(file, "\n");
12060 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12061 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12064 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12068 ix86_expand_clear (rtx dest)
12072 /* We play register width games, which are only valid after reload. */
12073 gcc_assert (reload_completed);
12075 /* Avoid HImode and its attendant prefix byte. */
12076 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12077 dest = gen_rtx_REG (SImode, REGNO (dest));
12078 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12080 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12081 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12083 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12084 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12090 /* X is an unchanging MEM. If it is a constant pool reference, return
12091 the constant pool rtx, else NULL. */
12094 maybe_get_pool_constant (rtx x)
12096 x = ix86_delegitimize_address (XEXP (x, 0));
12098 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12099 return get_pool_constant (x);
12105 ix86_expand_move (enum machine_mode mode, rtx operands[])
12108 enum tls_model model;
12113 if (GET_CODE (op1) == SYMBOL_REF)
12115 model = SYMBOL_REF_TLS_MODEL (op1);
12118 op1 = legitimize_tls_address (op1, model, true);
12119 op1 = force_operand (op1, op0);
12123 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12124 && SYMBOL_REF_DLLIMPORT_P (op1))
12125 op1 = legitimize_dllimport_symbol (op1, false);
12127 else if (GET_CODE (op1) == CONST
12128 && GET_CODE (XEXP (op1, 0)) == PLUS
12129 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12131 rtx addend = XEXP (XEXP (op1, 0), 1);
12132 rtx symbol = XEXP (XEXP (op1, 0), 0);
12135 model = SYMBOL_REF_TLS_MODEL (symbol);
12137 tmp = legitimize_tls_address (symbol, model, true);
12138 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12139 && SYMBOL_REF_DLLIMPORT_P (symbol))
12140 tmp = legitimize_dllimport_symbol (symbol, true);
12144 tmp = force_operand (tmp, NULL);
12145 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12146 op0, 1, OPTAB_DIRECT);
12152 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12154 if (TARGET_MACHO && !TARGET_64BIT)
12159 rtx temp = ((reload_in_progress
12160 || ((op0 && REG_P (op0))
12162 ? op0 : gen_reg_rtx (Pmode));
12163 op1 = machopic_indirect_data_reference (op1, temp);
12164 op1 = machopic_legitimize_pic_address (op1, mode,
12165 temp == op1 ? 0 : temp);
12167 else if (MACHOPIC_INDIRECT)
12168 op1 = machopic_indirect_data_reference (op1, 0);
12176 op1 = force_reg (Pmode, op1);
12177 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12179 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12180 op1 = legitimize_pic_address (op1, reg);
12189 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12190 || !push_operand (op0, mode))
12192 op1 = force_reg (mode, op1);
12194 if (push_operand (op0, mode)
12195 && ! general_no_elim_operand (op1, mode))
12196 op1 = copy_to_mode_reg (mode, op1);
12198 /* Force large constants in 64bit compilation into register
12199 to get them CSEed. */
12200 if (can_create_pseudo_p ()
12201 && (mode == DImode) && TARGET_64BIT
12202 && immediate_operand (op1, mode)
12203 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12204 && !register_operand (op0, mode)
12206 op1 = copy_to_mode_reg (mode, op1);
12208 if (can_create_pseudo_p ()
12209 && FLOAT_MODE_P (mode)
12210 && GET_CODE (op1) == CONST_DOUBLE)
12212 /* If we are loading a floating point constant to a register,
12213 force the value to memory now, since we'll get better code
12214 out the back end. */
12216 op1 = validize_mem (force_const_mem (mode, op1));
12217 if (!register_operand (op0, mode))
12219 rtx temp = gen_reg_rtx (mode);
12220 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12221 emit_move_insn (op0, temp);
12227 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12231 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12233 rtx op0 = operands[0], op1 = operands[1];
12234 unsigned int align = GET_MODE_ALIGNMENT (mode);
12236 /* Force constants other than zero into memory. We do not know how
12237 the instructions used to build constants modify the upper 64 bits
12238 of the register, once we have that information we may be able
12239 to handle some of them more efficiently. */
12240 if (can_create_pseudo_p ()
12241 && register_operand (op0, mode)
12242 && (CONSTANT_P (op1)
12243 || (GET_CODE (op1) == SUBREG
12244 && CONSTANT_P (SUBREG_REG (op1))))
12245 && standard_sse_constant_p (op1) <= 0)
12246 op1 = validize_mem (force_const_mem (mode, op1));
12248 /* We need to check memory alignment for SSE mode since attribute
12249 can make operands unaligned. */
12250 if (can_create_pseudo_p ()
12251 && SSE_REG_MODE_P (mode)
12252 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12253 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12257 /* ix86_expand_vector_move_misalign() does not like constants ... */
12258 if (CONSTANT_P (op1)
12259 || (GET_CODE (op1) == SUBREG
12260 && CONSTANT_P (SUBREG_REG (op1))))
12261 op1 = validize_mem (force_const_mem (mode, op1));
12263 /* ... nor both arguments in memory. */
12264 if (!register_operand (op0, mode)
12265 && !register_operand (op1, mode))
12266 op1 = force_reg (mode, op1);
12268 tmp[0] = op0; tmp[1] = op1;
12269 ix86_expand_vector_move_misalign (mode, tmp);
12273 /* Make operand1 a register if it isn't already. */
12274 if (can_create_pseudo_p ()
12275 && !register_operand (op0, mode)
12276 && !register_operand (op1, mode))
12278 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12282 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12285 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12286 straight to ix86_expand_vector_move. */
12287 /* Code generation for scalar reg-reg moves of single and double precision data:
12288 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12292 if (x86_sse_partial_reg_dependency == true)
12297 Code generation for scalar loads of double precision data:
12298 if (x86_sse_split_regs == true)
12299 movlpd mem, reg (gas syntax)
12303 Code generation for unaligned packed loads of single precision data
12304 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12305 if (x86_sse_unaligned_move_optimal)
12308 if (x86_sse_partial_reg_dependency == true)
12320 Code generation for unaligned packed loads of double precision data
12321 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12322 if (x86_sse_unaligned_move_optimal)
12325 if (x86_sse_split_regs == true)
12338 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12347 switch (GET_MODE_CLASS (mode))
12349 case MODE_VECTOR_INT:
12351 switch (GET_MODE_SIZE (mode))
12354 op0 = gen_lowpart (V16QImode, op0);
12355 op1 = gen_lowpart (V16QImode, op1);
12356 emit_insn (gen_avx_movdqu (op0, op1));
12359 op0 = gen_lowpart (V32QImode, op0);
12360 op1 = gen_lowpart (V32QImode, op1);
12361 emit_insn (gen_avx_movdqu256 (op0, op1));
12364 gcc_unreachable ();
12367 case MODE_VECTOR_FLOAT:
12368 op0 = gen_lowpart (mode, op0);
12369 op1 = gen_lowpart (mode, op1);
12374 emit_insn (gen_avx_movups (op0, op1));
12377 emit_insn (gen_avx_movups256 (op0, op1));
12380 emit_insn (gen_avx_movupd (op0, op1));
12383 emit_insn (gen_avx_movupd256 (op0, op1));
12386 gcc_unreachable ();
12391 gcc_unreachable ();
12399 /* If we're optimizing for size, movups is the smallest. */
12400 if (optimize_insn_for_size_p ())
12402 op0 = gen_lowpart (V4SFmode, op0);
12403 op1 = gen_lowpart (V4SFmode, op1);
12404 emit_insn (gen_sse_movups (op0, op1));
12408 /* ??? If we have typed data, then it would appear that using
12409 movdqu is the only way to get unaligned data loaded with
12411 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12413 op0 = gen_lowpart (V16QImode, op0);
12414 op1 = gen_lowpart (V16QImode, op1);
12415 emit_insn (gen_sse2_movdqu (op0, op1));
12419 if (TARGET_SSE2 && mode == V2DFmode)
12423 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12425 op0 = gen_lowpart (V2DFmode, op0);
12426 op1 = gen_lowpart (V2DFmode, op1);
12427 emit_insn (gen_sse2_movupd (op0, op1));
12431 /* When SSE registers are split into halves, we can avoid
12432 writing to the top half twice. */
12433 if (TARGET_SSE_SPLIT_REGS)
12435 emit_clobber (op0);
12440 /* ??? Not sure about the best option for the Intel chips.
12441 The following would seem to satisfy; the register is
12442 entirely cleared, breaking the dependency chain. We
12443 then store to the upper half, with a dependency depth
12444 of one. A rumor has it that Intel recommends two movsd
12445 followed by an unpacklpd, but this is unconfirmed. And
12446 given that the dependency depth of the unpacklpd would
12447 still be one, I'm not sure why this would be better. */
12448 zero = CONST0_RTX (V2DFmode);
12451 m = adjust_address (op1, DFmode, 0);
12452 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12453 m = adjust_address (op1, DFmode, 8);
12454 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12458 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12460 op0 = gen_lowpart (V4SFmode, op0);
12461 op1 = gen_lowpart (V4SFmode, op1);
12462 emit_insn (gen_sse_movups (op0, op1));
12466 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12467 emit_move_insn (op0, CONST0_RTX (mode));
12469 emit_clobber (op0);
12471 if (mode != V4SFmode)
12472 op0 = gen_lowpart (V4SFmode, op0);
12473 m = adjust_address (op1, V2SFmode, 0);
12474 emit_insn (gen_sse_loadlps (op0, op0, m));
12475 m = adjust_address (op1, V2SFmode, 8);
12476 emit_insn (gen_sse_loadhps (op0, op0, m));
12479 else if (MEM_P (op0))
12481 /* If we're optimizing for size, movups is the smallest. */
12482 if (optimize_insn_for_size_p ())
12484 op0 = gen_lowpart (V4SFmode, op0);
12485 op1 = gen_lowpart (V4SFmode, op1);
12486 emit_insn (gen_sse_movups (op0, op1));
12490 /* ??? Similar to above, only less clear because of quote
12491 typeless stores unquote. */
12492 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12493 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12495 op0 = gen_lowpart (V16QImode, op0);
12496 op1 = gen_lowpart (V16QImode, op1);
12497 emit_insn (gen_sse2_movdqu (op0, op1));
12501 if (TARGET_SSE2 && mode == V2DFmode)
12503 m = adjust_address (op0, DFmode, 0);
12504 emit_insn (gen_sse2_storelpd (m, op1));
12505 m = adjust_address (op0, DFmode, 8);
12506 emit_insn (gen_sse2_storehpd (m, op1));
12510 if (mode != V4SFmode)
12511 op1 = gen_lowpart (V4SFmode, op1);
12512 m = adjust_address (op0, V2SFmode, 0);
12513 emit_insn (gen_sse_storelps (m, op1));
12514 m = adjust_address (op0, V2SFmode, 8);
12515 emit_insn (gen_sse_storehps (m, op1));
12519 gcc_unreachable ();
12522 /* Expand a push in MODE. This is some mode for which we do not support
12523 proper push instructions, at least from the registers that we expect
12524 the value to live in. */
12527 ix86_expand_push (enum machine_mode mode, rtx x)
12531 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12532 GEN_INT (-GET_MODE_SIZE (mode)),
12533 stack_pointer_rtx, 1, OPTAB_DIRECT);
12534 if (tmp != stack_pointer_rtx)
12535 emit_move_insn (stack_pointer_rtx, tmp);
12537 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12538 emit_move_insn (tmp, x);
12541 /* Helper function of ix86_fixup_binary_operands to canonicalize
12542 operand order. Returns true if the operands should be swapped. */
12545 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12548 rtx dst = operands[0];
12549 rtx src1 = operands[1];
12550 rtx src2 = operands[2];
12552 /* If the operation is not commutative, we can't do anything. */
12553 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12556 /* Highest priority is that src1 should match dst. */
12557 if (rtx_equal_p (dst, src1))
12559 if (rtx_equal_p (dst, src2))
12562 /* Next highest priority is that immediate constants come second. */
12563 if (immediate_operand (src2, mode))
12565 if (immediate_operand (src1, mode))
12568 /* Lowest priority is that memory references should come second. */
12578 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12579 destination to use for the operation. If different from the true
12580 destination in operands[0], a copy operation will be required. */
12583 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12586 rtx dst = operands[0];
12587 rtx src1 = operands[1];
12588 rtx src2 = operands[2];
12590 /* Canonicalize operand order. */
12591 if (ix86_swap_binary_operands_p (code, mode, operands))
12595 /* It is invalid to swap operands of different modes. */
12596 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12603 /* Both source operands cannot be in memory. */
12604 if (MEM_P (src1) && MEM_P (src2))
12606 /* Optimization: Only read from memory once. */
12607 if (rtx_equal_p (src1, src2))
12609 src2 = force_reg (mode, src2);
12613 src2 = force_reg (mode, src2);
12616 /* If the destination is memory, and we do not have matching source
12617 operands, do things in registers. */
12618 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12619 dst = gen_reg_rtx (mode);
12621 /* Source 1 cannot be a constant. */
12622 if (CONSTANT_P (src1))
12623 src1 = force_reg (mode, src1);
12625 /* Source 1 cannot be a non-matching memory. */
12626 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12627 src1 = force_reg (mode, src1);
12629 operands[1] = src1;
12630 operands[2] = src2;
12634 /* Similarly, but assume that the destination has already been
12635 set up properly. */
12638 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12639 enum machine_mode mode, rtx operands[])
12641 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12642 gcc_assert (dst == operands[0]);
12645 /* Attempt to expand a binary operator. Make the expansion closer to the
12646 actual machine, then just general_operand, which will allow 3 separate
12647 memory references (one output, two input) in a single insn. */
12650 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12653 rtx src1, src2, dst, op, clob;
12655 dst = ix86_fixup_binary_operands (code, mode, operands);
12656 src1 = operands[1];
12657 src2 = operands[2];
12659 /* Emit the instruction. */
12661 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12662 if (reload_in_progress)
12664 /* Reload doesn't know about the flags register, and doesn't know that
12665 it doesn't want to clobber it. We can only do this with PLUS. */
12666 gcc_assert (code == PLUS);
12671 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12672 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12675 /* Fix up the destination if needed. */
12676 if (dst != operands[0])
12677 emit_move_insn (operands[0], dst);
12680 /* Return TRUE or FALSE depending on whether the binary operator meets the
12681 appropriate constraints. */
12684 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12687 rtx dst = operands[0];
12688 rtx src1 = operands[1];
12689 rtx src2 = operands[2];
12691 /* Both source operands cannot be in memory. */
12692 if (MEM_P (src1) && MEM_P (src2))
12695 /* Canonicalize operand order for commutative operators. */
12696 if (ix86_swap_binary_operands_p (code, mode, operands))
12703 /* If the destination is memory, we must have a matching source operand. */
12704 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12707 /* Source 1 cannot be a constant. */
12708 if (CONSTANT_P (src1))
12711 /* Source 1 cannot be a non-matching memory. */
12712 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12718 /* Attempt to expand a unary operator. Make the expansion closer to the
12719 actual machine, then just general_operand, which will allow 2 separate
12720 memory references (one output, one input) in a single insn. */
12723 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
12726 int matching_memory;
12727 rtx src, dst, op, clob;
12732 /* If the destination is memory, and we do not have matching source
12733 operands, do things in registers. */
12734 matching_memory = 0;
12737 if (rtx_equal_p (dst, src))
12738 matching_memory = 1;
12740 dst = gen_reg_rtx (mode);
12743 /* When source operand is memory, destination must match. */
12744 if (MEM_P (src) && !matching_memory)
12745 src = force_reg (mode, src);
12747 /* Emit the instruction. */
12749 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
12750 if (reload_in_progress || code == NOT)
12752 /* Reload doesn't know about the flags register, and doesn't know that
12753 it doesn't want to clobber it. */
12754 gcc_assert (code == NOT);
12759 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12760 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12763 /* Fix up the destination if needed. */
12764 if (dst != operands[0])
12765 emit_move_insn (operands[0], dst);
12768 /* Return TRUE or FALSE depending on whether the unary operator meets the
12769 appropriate constraints. */
12772 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
12773 enum machine_mode mode ATTRIBUTE_UNUSED,
12774 rtx operands[2] ATTRIBUTE_UNUSED)
12776 /* If one of operands is memory, source and destination must match. */
12777 if ((MEM_P (operands[0])
12778 || MEM_P (operands[1]))
12779 && ! rtx_equal_p (operands[0], operands[1]))
12784 /* Post-reload splitter for converting an SF or DFmode value in an
12785 SSE register into an unsigned SImode. */
12788 ix86_split_convert_uns_si_sse (rtx operands[])
12790 enum machine_mode vecmode;
12791 rtx value, large, zero_or_two31, input, two31, x;
12793 large = operands[1];
12794 zero_or_two31 = operands[2];
12795 input = operands[3];
12796 two31 = operands[4];
12797 vecmode = GET_MODE (large);
12798 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
12800 /* Load up the value into the low element. We must ensure that the other
12801 elements are valid floats -- zero is the easiest such value. */
12804 if (vecmode == V4SFmode)
12805 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
12807 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
12811 input = gen_rtx_REG (vecmode, REGNO (input));
12812 emit_move_insn (value, CONST0_RTX (vecmode));
12813 if (vecmode == V4SFmode)
12814 emit_insn (gen_sse_movss (value, value, input));
12816 emit_insn (gen_sse2_movsd (value, value, input));
12819 emit_move_insn (large, two31);
12820 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
12822 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
12823 emit_insn (gen_rtx_SET (VOIDmode, large, x));
12825 x = gen_rtx_AND (vecmode, zero_or_two31, large);
12826 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
12828 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
12829 emit_insn (gen_rtx_SET (VOIDmode, value, x));
12831 large = gen_rtx_REG (V4SImode, REGNO (large));
12832 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
12834 x = gen_rtx_REG (V4SImode, REGNO (value));
12835 if (vecmode == V4SFmode)
12836 emit_insn (gen_sse2_cvttps2dq (x, value));
12838 emit_insn (gen_sse2_cvttpd2dq (x, value));
12841 emit_insn (gen_xorv4si3 (value, value, large));
12844 /* Convert an unsigned DImode value into a DFmode, using only SSE.
12845 Expects the 64-bit DImode to be supplied in a pair of integral
12846 registers. Requires SSE2; will use SSE3 if available. For x86_32,
12847 -mfpmath=sse, !optimize_size only. */
12850 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
12852 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
12853 rtx int_xmm, fp_xmm;
12854 rtx biases, exponents;
12857 int_xmm = gen_reg_rtx (V4SImode);
12858 if (TARGET_INTER_UNIT_MOVES)
12859 emit_insn (gen_movdi_to_sse (int_xmm, input));
12860 else if (TARGET_SSE_SPLIT_REGS)
12862 emit_clobber (int_xmm);
12863 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
12867 x = gen_reg_rtx (V2DImode);
12868 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
12869 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
12872 x = gen_rtx_CONST_VECTOR (V4SImode,
12873 gen_rtvec (4, GEN_INT (0x43300000UL),
12874 GEN_INT (0x45300000UL),
12875 const0_rtx, const0_rtx));
12876 exponents = validize_mem (force_const_mem (V4SImode, x));
12878 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
12879 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
12881 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
12882 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
12883 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
12884 (0x1.0p84 + double(fp_value_hi_xmm)).
12885 Note these exponents differ by 32. */
12887 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
12889 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
12890 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
12891 real_ldexp (&bias_lo_rvt, &dconst1, 52);
12892 real_ldexp (&bias_hi_rvt, &dconst1, 84);
12893 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
12894 x = const_double_from_real_value (bias_hi_rvt, DFmode);
12895 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
12896 biases = validize_mem (force_const_mem (V2DFmode, biases));
12897 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
12899 /* Add the upper and lower DFmode values together. */
12901 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
12904 x = copy_to_mode_reg (V2DFmode, fp_xmm);
12905 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
12906 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
12909 ix86_expand_vector_extract (false, target, fp_xmm, 0);
12912 /* Not used, but eases macroization of patterns. */
12914 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
12915 rtx input ATTRIBUTE_UNUSED)
12917 gcc_unreachable ();
12920 /* Convert an unsigned SImode value into a DFmode. Only currently used
12921 for SSE, but applicable anywhere. */
12924 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
12926 REAL_VALUE_TYPE TWO31r;
12929 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
12930 NULL, 1, OPTAB_DIRECT);
12932 fp = gen_reg_rtx (DFmode);
12933 emit_insn (gen_floatsidf2 (fp, x));
12935 real_ldexp (&TWO31r, &dconst1, 31);
12936 x = const_double_from_real_value (TWO31r, DFmode);
12938 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
12940 emit_move_insn (target, x);
12943 /* Convert a signed DImode value into a DFmode. Only used for SSE in
12944 32-bit mode; otherwise we have a direct convert instruction. */
12947 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
12949 REAL_VALUE_TYPE TWO32r;
12950 rtx fp_lo, fp_hi, x;
12952 fp_lo = gen_reg_rtx (DFmode);
12953 fp_hi = gen_reg_rtx (DFmode);
12955 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
12957 real_ldexp (&TWO32r, &dconst1, 32);
12958 x = const_double_from_real_value (TWO32r, DFmode);
12959 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
12961 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
12963 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
12966 emit_move_insn (target, x);
12969 /* Convert an unsigned SImode value into a SFmode, using only SSE.
12970 For x86_32, -mfpmath=sse, !optimize_size only. */
12972 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
12974 REAL_VALUE_TYPE ONE16r;
12975 rtx fp_hi, fp_lo, int_hi, int_lo, x;
12977 real_ldexp (&ONE16r, &dconst1, 16);
12978 x = const_double_from_real_value (ONE16r, SFmode);
12979 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
12980 NULL, 0, OPTAB_DIRECT);
12981 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
12982 NULL, 0, OPTAB_DIRECT);
12983 fp_hi = gen_reg_rtx (SFmode);
12984 fp_lo = gen_reg_rtx (SFmode);
12985 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
12986 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
12987 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
12989 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
12991 if (!rtx_equal_p (target, fp_hi))
12992 emit_move_insn (target, fp_hi);
12995 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
12996 then replicate the value for all elements of the vector
13000 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13007 v = gen_rtvec (4, value, value, value, value);
13008 return gen_rtx_CONST_VECTOR (V4SImode, v);
13012 v = gen_rtvec (2, value, value);
13013 return gen_rtx_CONST_VECTOR (V2DImode, v);
13017 v = gen_rtvec (4, value, value, value, value);
13019 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13020 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13021 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13025 v = gen_rtvec (2, value, value);
13027 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13028 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13031 gcc_unreachable ();
13035 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13036 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13037 for an SSE register. If VECT is true, then replicate the mask for
13038 all elements of the vector register. If INVERT is true, then create
13039 a mask excluding the sign bit. */
13042 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13044 enum machine_mode vec_mode, imode;
13045 HOST_WIDE_INT hi, lo;
13050 /* Find the sign bit, sign extended to 2*HWI. */
13056 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13057 lo = 0x80000000, hi = lo < 0;
13063 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13064 if (HOST_BITS_PER_WIDE_INT >= 64)
13065 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13067 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13072 vec_mode = VOIDmode;
13073 if (HOST_BITS_PER_WIDE_INT >= 64)
13076 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13083 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13087 lo = ~lo, hi = ~hi;
13093 mask = immed_double_const (lo, hi, imode);
13095 vec = gen_rtvec (2, v, mask);
13096 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13097 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13104 gcc_unreachable ();
13108 lo = ~lo, hi = ~hi;
13110 /* Force this value into the low part of a fp vector constant. */
13111 mask = immed_double_const (lo, hi, imode);
13112 mask = gen_lowpart (mode, mask);
13114 if (vec_mode == VOIDmode)
13115 return force_reg (mode, mask);
13117 v = ix86_build_const_vector (mode, vect, mask);
13118 return force_reg (vec_mode, v);
13121 /* Generate code for floating point ABS or NEG. */
13124 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13127 rtx mask, set, use, clob, dst, src;
13128 bool use_sse = false;
13129 bool vector_mode = VECTOR_MODE_P (mode);
13130 enum machine_mode elt_mode = mode;
13134 elt_mode = GET_MODE_INNER (mode);
13137 else if (mode == TFmode)
13139 else if (TARGET_SSE_MATH)
13140 use_sse = SSE_FLOAT_MODE_P (mode);
13142 /* NEG and ABS performed with SSE use bitwise mask operations.
13143 Create the appropriate mask now. */
13145 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13154 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13155 set = gen_rtx_SET (VOIDmode, dst, set);
13160 set = gen_rtx_fmt_e (code, mode, src);
13161 set = gen_rtx_SET (VOIDmode, dst, set);
13164 use = gen_rtx_USE (VOIDmode, mask);
13165 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13166 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13167 gen_rtvec (3, set, use, clob)));
13174 /* Expand a copysign operation. Special case operand 0 being a constant. */
13177 ix86_expand_copysign (rtx operands[])
13179 enum machine_mode mode;
13180 rtx dest, op0, op1, mask, nmask;
13182 dest = operands[0];
13186 mode = GET_MODE (dest);
13188 if (GET_CODE (op0) == CONST_DOUBLE)
13190 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13192 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13193 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13195 if (mode == SFmode || mode == DFmode)
13197 enum machine_mode vmode;
13199 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13201 if (op0 == CONST0_RTX (mode))
13202 op0 = CONST0_RTX (vmode);
13207 if (mode == SFmode)
13208 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13209 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13211 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13213 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13216 else if (op0 != CONST0_RTX (mode))
13217 op0 = force_reg (mode, op0);
13219 mask = ix86_build_signbit_mask (mode, 0, 0);
13221 if (mode == SFmode)
13222 copysign_insn = gen_copysignsf3_const;
13223 else if (mode == DFmode)
13224 copysign_insn = gen_copysigndf3_const;
13226 copysign_insn = gen_copysigntf3_const;
13228 emit_insn (copysign_insn (dest, op0, op1, mask));
13232 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13234 nmask = ix86_build_signbit_mask (mode, 0, 1);
13235 mask = ix86_build_signbit_mask (mode, 0, 0);
13237 if (mode == SFmode)
13238 copysign_insn = gen_copysignsf3_var;
13239 else if (mode == DFmode)
13240 copysign_insn = gen_copysigndf3_var;
13242 copysign_insn = gen_copysigntf3_var;
13244 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13248 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13249 be a constant, and so has already been expanded into a vector constant. */
13252 ix86_split_copysign_const (rtx operands[])
13254 enum machine_mode mode, vmode;
13255 rtx dest, op0, op1, mask, x;
13257 dest = operands[0];
13260 mask = operands[3];
13262 mode = GET_MODE (dest);
13263 vmode = GET_MODE (mask);
13265 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13266 x = gen_rtx_AND (vmode, dest, mask);
13267 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13269 if (op0 != CONST0_RTX (vmode))
13271 x = gen_rtx_IOR (vmode, dest, op0);
13272 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13276 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13277 so we have to do two masks. */
13280 ix86_split_copysign_var (rtx operands[])
13282 enum machine_mode mode, vmode;
13283 rtx dest, scratch, op0, op1, mask, nmask, x;
13285 dest = operands[0];
13286 scratch = operands[1];
13289 nmask = operands[4];
13290 mask = operands[5];
13292 mode = GET_MODE (dest);
13293 vmode = GET_MODE (mask);
13295 if (rtx_equal_p (op0, op1))
13297 /* Shouldn't happen often (it's useless, obviously), but when it does
13298 we'd generate incorrect code if we continue below. */
13299 emit_move_insn (dest, op0);
13303 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13305 gcc_assert (REGNO (op1) == REGNO (scratch));
13307 x = gen_rtx_AND (vmode, scratch, mask);
13308 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13311 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13312 x = gen_rtx_NOT (vmode, dest);
13313 x = gen_rtx_AND (vmode, x, op0);
13314 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13318 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13320 x = gen_rtx_AND (vmode, scratch, mask);
13322 else /* alternative 2,4 */
13324 gcc_assert (REGNO (mask) == REGNO (scratch));
13325 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13326 x = gen_rtx_AND (vmode, scratch, op1);
13328 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13330 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13332 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13333 x = gen_rtx_AND (vmode, dest, nmask);
13335 else /* alternative 3,4 */
13337 gcc_assert (REGNO (nmask) == REGNO (dest));
13339 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13340 x = gen_rtx_AND (vmode, dest, op0);
13342 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13345 x = gen_rtx_IOR (vmode, dest, scratch);
13346 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13349 /* Return TRUE or FALSE depending on whether the first SET in INSN
13350 has source and destination with matching CC modes, and that the
13351 CC mode is at least as constrained as REQ_MODE. */
13354 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13357 enum machine_mode set_mode;
13359 set = PATTERN (insn);
13360 if (GET_CODE (set) == PARALLEL)
13361 set = XVECEXP (set, 0, 0);
13362 gcc_assert (GET_CODE (set) == SET);
13363 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13365 set_mode = GET_MODE (SET_DEST (set));
13369 if (req_mode != CCNOmode
13370 && (req_mode != CCmode
13371 || XEXP (SET_SRC (set), 1) != const0_rtx))
13375 if (req_mode == CCGCmode)
13379 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13383 if (req_mode == CCZmode)
13394 gcc_unreachable ();
13397 return (GET_MODE (SET_SRC (set)) == set_mode);
13400 /* Generate insn patterns to do an integer compare of OPERANDS. */
13403 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13405 enum machine_mode cmpmode;
13408 cmpmode = SELECT_CC_MODE (code, op0, op1);
13409 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13411 /* This is very simple, but making the interface the same as in the
13412 FP case makes the rest of the code easier. */
13413 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13414 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13416 /* Return the test that should be put into the flags user, i.e.
13417 the bcc, scc, or cmov instruction. */
13418 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13421 /* Figure out whether to use ordered or unordered fp comparisons.
13422 Return the appropriate mode to use. */
13425 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13427 /* ??? In order to make all comparisons reversible, we do all comparisons
13428 non-trapping when compiling for IEEE. Once gcc is able to distinguish
13429 all forms trapping and nontrapping comparisons, we can make inequality
13430 comparisons trapping again, since it results in better code when using
13431 FCOM based compares. */
13432 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13436 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13438 enum machine_mode mode = GET_MODE (op0);
13440 if (SCALAR_FLOAT_MODE_P (mode))
13442 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13443 return ix86_fp_compare_mode (code);
13448 /* Only zero flag is needed. */
13449 case EQ: /* ZF=0 */
13450 case NE: /* ZF!=0 */
13452 /* Codes needing carry flag. */
13453 case GEU: /* CF=0 */
13454 case LTU: /* CF=1 */
13455 /* Detect overflow checks. They need just the carry flag. */
13456 if (GET_CODE (op0) == PLUS
13457 && rtx_equal_p (op1, XEXP (op0, 0)))
13461 case GTU: /* CF=0 & ZF=0 */
13462 case LEU: /* CF=1 | ZF=1 */
13463 /* Detect overflow checks. They need just the carry flag. */
13464 if (GET_CODE (op0) == MINUS
13465 && rtx_equal_p (op1, XEXP (op0, 0)))
13469 /* Codes possibly doable only with sign flag when
13470 comparing against zero. */
13471 case GE: /* SF=OF or SF=0 */
13472 case LT: /* SF<>OF or SF=1 */
13473 if (op1 == const0_rtx)
13476 /* For other cases Carry flag is not required. */
13478 /* Codes doable only with sign flag when comparing
13479 against zero, but we miss jump instruction for it
13480 so we need to use relational tests against overflow
13481 that thus needs to be zero. */
13482 case GT: /* ZF=0 & SF=OF */
13483 case LE: /* ZF=1 | SF<>OF */
13484 if (op1 == const0_rtx)
13488 /* strcmp pattern do (use flags) and combine may ask us for proper
13493 gcc_unreachable ();
13497 /* Return the fixed registers used for condition codes. */
13500 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13507 /* If two condition code modes are compatible, return a condition code
13508 mode which is compatible with both. Otherwise, return
13511 static enum machine_mode
13512 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
13517 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
13520 if ((m1 == CCGCmode && m2 == CCGOCmode)
13521 || (m1 == CCGOCmode && m2 == CCGCmode))
13527 gcc_unreachable ();
13557 /* These are only compatible with themselves, which we already
13563 /* Split comparison code CODE into comparisons we can do using branch
13564 instructions. BYPASS_CODE is comparison code for branch that will
13565 branch around FIRST_CODE and SECOND_CODE. If some of branches
13566 is not required, set value to UNKNOWN.
13567 We never require more than two branches. */
13570 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
13571 enum rtx_code *first_code,
13572 enum rtx_code *second_code)
13574 *first_code = code;
13575 *bypass_code = UNKNOWN;
13576 *second_code = UNKNOWN;
13578 /* The fcomi comparison sets flags as follows:
13588 case GT: /* GTU - CF=0 & ZF=0 */
13589 case GE: /* GEU - CF=0 */
13590 case ORDERED: /* PF=0 */
13591 case UNORDERED: /* PF=1 */
13592 case UNEQ: /* EQ - ZF=1 */
13593 case UNLT: /* LTU - CF=1 */
13594 case UNLE: /* LEU - CF=1 | ZF=1 */
13595 case LTGT: /* EQ - ZF=0 */
13597 case LT: /* LTU - CF=1 - fails on unordered */
13598 *first_code = UNLT;
13599 *bypass_code = UNORDERED;
13601 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
13602 *first_code = UNLE;
13603 *bypass_code = UNORDERED;
13605 case EQ: /* EQ - ZF=1 - fails on unordered */
13606 *first_code = UNEQ;
13607 *bypass_code = UNORDERED;
13609 case NE: /* NE - ZF=0 - fails on unordered */
13610 *first_code = LTGT;
13611 *second_code = UNORDERED;
13613 case UNGE: /* GEU - CF=0 - fails on unordered */
13615 *second_code = UNORDERED;
13617 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
13619 *second_code = UNORDERED;
13622 gcc_unreachable ();
13624 if (!TARGET_IEEE_FP)
13626 *second_code = UNKNOWN;
13627 *bypass_code = UNKNOWN;
13631 /* Return cost of comparison done fcom + arithmetics operations on AX.
13632 All following functions do use number of instructions as a cost metrics.
13633 In future this should be tweaked to compute bytes for optimize_size and
13634 take into account performance of various instructions on various CPUs. */
13636 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
13638 if (!TARGET_IEEE_FP)
13640 /* The cost of code output by ix86_expand_fp_compare. */
13664 gcc_unreachable ();
13668 /* Return cost of comparison done using fcomi operation.
13669 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13671 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
13673 enum rtx_code bypass_code, first_code, second_code;
13674 /* Return arbitrarily high cost when instruction is not supported - this
13675 prevents gcc from using it. */
13678 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13679 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
13682 /* Return cost of comparison done using sahf operation.
13683 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13685 ix86_fp_comparison_sahf_cost (enum rtx_code code)
13687 enum rtx_code bypass_code, first_code, second_code;
13688 /* Return arbitrarily high cost when instruction is not preferred - this
13689 avoids gcc from using it. */
13690 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
13692 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13693 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
13696 /* Compute cost of the comparison done using any method.
13697 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13699 ix86_fp_comparison_cost (enum rtx_code code)
13701 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
13704 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
13705 sahf_cost = ix86_fp_comparison_sahf_cost (code);
13707 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
13708 if (min > sahf_cost)
13710 if (min > fcomi_cost)
13715 /* Return true if we should use an FCOMI instruction for this
13719 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
13721 enum rtx_code swapped_code = swap_condition (code);
13723 return ((ix86_fp_comparison_cost (code)
13724 == ix86_fp_comparison_fcomi_cost (code))
13725 || (ix86_fp_comparison_cost (swapped_code)
13726 == ix86_fp_comparison_fcomi_cost (swapped_code)));
13729 /* Swap, force into registers, or otherwise massage the two operands
13730 to a fp comparison. The operands are updated in place; the new
13731 comparison code is returned. */
13733 static enum rtx_code
13734 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
13736 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
13737 rtx op0 = *pop0, op1 = *pop1;
13738 enum machine_mode op_mode = GET_MODE (op0);
13739 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
13741 /* All of the unordered compare instructions only work on registers.
13742 The same is true of the fcomi compare instructions. The XFmode
13743 compare instructions require registers except when comparing
13744 against zero or when converting operand 1 from fixed point to
13748 && (fpcmp_mode == CCFPUmode
13749 || (op_mode == XFmode
13750 && ! (standard_80387_constant_p (op0) == 1
13751 || standard_80387_constant_p (op1) == 1)
13752 && GET_CODE (op1) != FLOAT)
13753 || ix86_use_fcomi_compare (code)))
13755 op0 = force_reg (op_mode, op0);
13756 op1 = force_reg (op_mode, op1);
13760 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
13761 things around if they appear profitable, otherwise force op0
13762 into a register. */
13764 if (standard_80387_constant_p (op0) == 0
13766 && ! (standard_80387_constant_p (op1) == 0
13770 tmp = op0, op0 = op1, op1 = tmp;
13771 code = swap_condition (code);
13775 op0 = force_reg (op_mode, op0);
13777 if (CONSTANT_P (op1))
13779 int tmp = standard_80387_constant_p (op1);
13781 op1 = validize_mem (force_const_mem (op_mode, op1));
13785 op1 = force_reg (op_mode, op1);
13788 op1 = force_reg (op_mode, op1);
13792 /* Try to rearrange the comparison to make it cheaper. */
13793 if (ix86_fp_comparison_cost (code)
13794 > ix86_fp_comparison_cost (swap_condition (code))
13795 && (REG_P (op1) || can_create_pseudo_p ()))
13798 tmp = op0, op0 = op1, op1 = tmp;
13799 code = swap_condition (code);
13801 op0 = force_reg (op_mode, op0);
13809 /* Convert comparison codes we use to represent FP comparison to integer
13810 code that will result in proper branch. Return UNKNOWN if no such code
13814 ix86_fp_compare_code_to_integer (enum rtx_code code)
13843 /* Generate insn patterns to do a floating point compare of OPERANDS. */
13846 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
13847 rtx *second_test, rtx *bypass_test)
13849 enum machine_mode fpcmp_mode, intcmp_mode;
13851 int cost = ix86_fp_comparison_cost (code);
13852 enum rtx_code bypass_code, first_code, second_code;
13854 fpcmp_mode = ix86_fp_compare_mode (code);
13855 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
13858 *second_test = NULL_RTX;
13860 *bypass_test = NULL_RTX;
13862 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13864 /* Do fcomi/sahf based test when profitable. */
13865 if (ix86_fp_comparison_arithmetics_cost (code) > cost
13866 && (bypass_code == UNKNOWN || bypass_test)
13867 && (second_code == UNKNOWN || second_test))
13869 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13870 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
13876 gcc_assert (TARGET_SAHF);
13879 scratch = gen_reg_rtx (HImode);
13880 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
13882 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
13885 /* The FP codes work out to act like unsigned. */
13886 intcmp_mode = fpcmp_mode;
13888 if (bypass_code != UNKNOWN)
13889 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
13890 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13892 if (second_code != UNKNOWN)
13893 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
13894 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13899 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
13900 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13901 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
13903 scratch = gen_reg_rtx (HImode);
13904 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
13906 /* In the unordered case, we have to check C2 for NaN's, which
13907 doesn't happen to work out to anything nice combination-wise.
13908 So do some bit twiddling on the value we've got in AH to come
13909 up with an appropriate set of condition codes. */
13911 intcmp_mode = CCNOmode;
13916 if (code == GT || !TARGET_IEEE_FP)
13918 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13923 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13924 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13925 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
13926 intcmp_mode = CCmode;
13932 if (code == LT && TARGET_IEEE_FP)
13934 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13935 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
13936 intcmp_mode = CCmode;
13941 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
13947 if (code == GE || !TARGET_IEEE_FP)
13949 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
13954 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13955 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13962 if (code == LE && TARGET_IEEE_FP)
13964 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13965 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13966 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13967 intcmp_mode = CCmode;
13972 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13978 if (code == EQ && TARGET_IEEE_FP)
13980 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13981 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13982 intcmp_mode = CCmode;
13987 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13994 if (code == NE && TARGET_IEEE_FP)
13996 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13997 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14003 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14009 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14013 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14018 gcc_unreachable ();
14022 /* Return the test that should be put into the flags user, i.e.
14023 the bcc, scc, or cmov instruction. */
14024 return gen_rtx_fmt_ee (code, VOIDmode,
14025 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14030 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14033 op0 = ix86_compare_op0;
14034 op1 = ix86_compare_op1;
14037 *second_test = NULL_RTX;
14039 *bypass_test = NULL_RTX;
14041 if (ix86_compare_emitted)
14043 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
14044 ix86_compare_emitted = NULL_RTX;
14046 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14048 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14049 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14050 second_test, bypass_test);
14053 ret = ix86_expand_int_compare (code, op0, op1);
14058 /* Return true if the CODE will result in nontrivial jump sequence. */
14060 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14062 enum rtx_code bypass_code, first_code, second_code;
14065 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14066 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14070 ix86_expand_branch (enum rtx_code code, rtx label)
14074 /* If we have emitted a compare insn, go straight to simple.
14075 ix86_expand_compare won't emit anything if ix86_compare_emitted
14077 if (ix86_compare_emitted)
14080 switch (GET_MODE (ix86_compare_op0))
14086 tmp = ix86_expand_compare (code, NULL, NULL);
14087 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14088 gen_rtx_LABEL_REF (VOIDmode, label),
14090 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14099 enum rtx_code bypass_code, first_code, second_code;
14101 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14102 &ix86_compare_op1);
14104 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14106 /* Check whether we will use the natural sequence with one jump. If
14107 so, we can expand jump early. Otherwise delay expansion by
14108 creating compound insn to not confuse optimizers. */
14109 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14111 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14112 gen_rtx_LABEL_REF (VOIDmode, label),
14113 pc_rtx, NULL_RTX, NULL_RTX);
14117 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14118 ix86_compare_op0, ix86_compare_op1);
14119 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14120 gen_rtx_LABEL_REF (VOIDmode, label),
14122 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14124 use_fcomi = ix86_use_fcomi_compare (code);
14125 vec = rtvec_alloc (3 + !use_fcomi);
14126 RTVEC_ELT (vec, 0) = tmp;
14128 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14130 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14133 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14135 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14144 /* Expand DImode branch into multiple compare+branch. */
14146 rtx lo[2], hi[2], label2;
14147 enum rtx_code code1, code2, code3;
14148 enum machine_mode submode;
14150 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14152 tmp = ix86_compare_op0;
14153 ix86_compare_op0 = ix86_compare_op1;
14154 ix86_compare_op1 = tmp;
14155 code = swap_condition (code);
14157 if (GET_MODE (ix86_compare_op0) == DImode)
14159 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14160 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14165 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14166 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14170 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14171 avoid two branches. This costs one extra insn, so disable when
14172 optimizing for size. */
14174 if ((code == EQ || code == NE)
14175 && (!optimize_insn_for_size_p ()
14176 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14181 if (hi[1] != const0_rtx)
14182 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14183 NULL_RTX, 0, OPTAB_WIDEN);
14186 if (lo[1] != const0_rtx)
14187 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14188 NULL_RTX, 0, OPTAB_WIDEN);
14190 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14191 NULL_RTX, 0, OPTAB_WIDEN);
14193 ix86_compare_op0 = tmp;
14194 ix86_compare_op1 = const0_rtx;
14195 ix86_expand_branch (code, label);
14199 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14200 op1 is a constant and the low word is zero, then we can just
14201 examine the high word. Similarly for low word -1 and
14202 less-or-equal-than or greater-than. */
14204 if (CONST_INT_P (hi[1]))
14207 case LT: case LTU: case GE: case GEU:
14208 if (lo[1] == const0_rtx)
14210 ix86_compare_op0 = hi[0];
14211 ix86_compare_op1 = hi[1];
14212 ix86_expand_branch (code, label);
14216 case LE: case LEU: case GT: case GTU:
14217 if (lo[1] == constm1_rtx)
14219 ix86_compare_op0 = hi[0];
14220 ix86_compare_op1 = hi[1];
14221 ix86_expand_branch (code, label);
14229 /* Otherwise, we need two or three jumps. */
14231 label2 = gen_label_rtx ();
14234 code2 = swap_condition (code);
14235 code3 = unsigned_condition (code);
14239 case LT: case GT: case LTU: case GTU:
14242 case LE: code1 = LT; code2 = GT; break;
14243 case GE: code1 = GT; code2 = LT; break;
14244 case LEU: code1 = LTU; code2 = GTU; break;
14245 case GEU: code1 = GTU; code2 = LTU; break;
14247 case EQ: code1 = UNKNOWN; code2 = NE; break;
14248 case NE: code2 = UNKNOWN; break;
14251 gcc_unreachable ();
14256 * if (hi(a) < hi(b)) goto true;
14257 * if (hi(a) > hi(b)) goto false;
14258 * if (lo(a) < lo(b)) goto true;
14262 ix86_compare_op0 = hi[0];
14263 ix86_compare_op1 = hi[1];
14265 if (code1 != UNKNOWN)
14266 ix86_expand_branch (code1, label);
14267 if (code2 != UNKNOWN)
14268 ix86_expand_branch (code2, label2);
14270 ix86_compare_op0 = lo[0];
14271 ix86_compare_op1 = lo[1];
14272 ix86_expand_branch (code3, label);
14274 if (code2 != UNKNOWN)
14275 emit_label (label2);
14280 gcc_unreachable ();
14284 /* Split branch based on floating point condition. */
14286 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14287 rtx target1, rtx target2, rtx tmp, rtx pushed)
14289 rtx second, bypass;
14290 rtx label = NULL_RTX;
14292 int bypass_probability = -1, second_probability = -1, probability = -1;
14295 if (target2 != pc_rtx)
14298 code = reverse_condition_maybe_unordered (code);
14303 condition = ix86_expand_fp_compare (code, op1, op2,
14304 tmp, &second, &bypass);
14306 /* Remove pushed operand from stack. */
14308 ix86_free_from_memory (GET_MODE (pushed));
14310 if (split_branch_probability >= 0)
14312 /* Distribute the probabilities across the jumps.
14313 Assume the BYPASS and SECOND to be always test
14315 probability = split_branch_probability;
14317 /* Value of 1 is low enough to make no need for probability
14318 to be updated. Later we may run some experiments and see
14319 if unordered values are more frequent in practice. */
14321 bypass_probability = 1;
14323 second_probability = 1;
14325 if (bypass != NULL_RTX)
14327 label = gen_label_rtx ();
14328 i = emit_jump_insn (gen_rtx_SET
14330 gen_rtx_IF_THEN_ELSE (VOIDmode,
14332 gen_rtx_LABEL_REF (VOIDmode,
14335 if (bypass_probability >= 0)
14337 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14338 GEN_INT (bypass_probability),
14341 i = emit_jump_insn (gen_rtx_SET
14343 gen_rtx_IF_THEN_ELSE (VOIDmode,
14344 condition, target1, target2)));
14345 if (probability >= 0)
14347 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14348 GEN_INT (probability),
14350 if (second != NULL_RTX)
14352 i = emit_jump_insn (gen_rtx_SET
14354 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14356 if (second_probability >= 0)
14358 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14359 GEN_INT (second_probability),
14362 if (label != NULL_RTX)
14363 emit_label (label);
14367 ix86_expand_setcc (enum rtx_code code, rtx dest)
14369 rtx ret, tmp, tmpreg, equiv;
14370 rtx second_test, bypass_test;
14372 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14373 return 0; /* FAIL */
14375 gcc_assert (GET_MODE (dest) == QImode);
14377 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14378 PUT_MODE (ret, QImode);
14383 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14384 if (bypass_test || second_test)
14386 rtx test = second_test;
14388 rtx tmp2 = gen_reg_rtx (QImode);
14391 gcc_assert (!second_test);
14392 test = bypass_test;
14394 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14396 PUT_MODE (test, QImode);
14397 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14400 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14402 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14405 /* Attach a REG_EQUAL note describing the comparison result. */
14406 if (ix86_compare_op0 && ix86_compare_op1)
14408 equiv = simplify_gen_relational (code, QImode,
14409 GET_MODE (ix86_compare_op0),
14410 ix86_compare_op0, ix86_compare_op1);
14411 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14414 return 1; /* DONE */
14417 /* Expand comparison setting or clearing carry flag. Return true when
14418 successful and set pop for the operation. */
14420 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14422 enum machine_mode mode =
14423 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14425 /* Do not handle DImode compares that go through special path. */
14426 if (mode == (TARGET_64BIT ? TImode : DImode))
14429 if (SCALAR_FLOAT_MODE_P (mode))
14431 rtx second_test = NULL, bypass_test = NULL;
14432 rtx compare_op, compare_seq;
14434 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14436 /* Shortcut: following common codes never translate
14437 into carry flag compares. */
14438 if (code == EQ || code == NE || code == UNEQ || code == LTGT
14439 || code == ORDERED || code == UNORDERED)
14442 /* These comparisons require zero flag; swap operands so they won't. */
14443 if ((code == GT || code == UNLE || code == LE || code == UNGT)
14444 && !TARGET_IEEE_FP)
14449 code = swap_condition (code);
14452 /* Try to expand the comparison and verify that we end up with
14453 carry flag based comparison. This fails to be true only when
14454 we decide to expand comparison using arithmetic that is not
14455 too common scenario. */
14457 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14458 &second_test, &bypass_test);
14459 compare_seq = get_insns ();
14462 if (second_test || bypass_test)
14465 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14466 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14467 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
14469 code = GET_CODE (compare_op);
14471 if (code != LTU && code != GEU)
14474 emit_insn (compare_seq);
14479 if (!INTEGRAL_MODE_P (mode))
14488 /* Convert a==0 into (unsigned)a<1. */
14491 if (op1 != const0_rtx)
14494 code = (code == EQ ? LTU : GEU);
14497 /* Convert a>b into b<a or a>=b-1. */
14500 if (CONST_INT_P (op1))
14502 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
14503 /* Bail out on overflow. We still can swap operands but that
14504 would force loading of the constant into register. */
14505 if (op1 == const0_rtx
14506 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
14508 code = (code == GTU ? GEU : LTU);
14515 code = (code == GTU ? LTU : GEU);
14519 /* Convert a>=0 into (unsigned)a<0x80000000. */
14522 if (mode == DImode || op1 != const0_rtx)
14524 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14525 code = (code == LT ? GEU : LTU);
14529 if (mode == DImode || op1 != constm1_rtx)
14531 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14532 code = (code == LE ? GEU : LTU);
14538 /* Swapping operands may cause constant to appear as first operand. */
14539 if (!nonimmediate_operand (op0, VOIDmode))
14541 if (!can_create_pseudo_p ())
14543 op0 = force_reg (mode, op0);
14545 ix86_compare_op0 = op0;
14546 ix86_compare_op1 = op1;
14547 *pop = ix86_expand_compare (code, NULL, NULL);
14548 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
14553 ix86_expand_int_movcc (rtx operands[])
14555 enum rtx_code code = GET_CODE (operands[1]), compare_code;
14556 rtx compare_seq, compare_op;
14557 rtx second_test, bypass_test;
14558 enum machine_mode mode = GET_MODE (operands[0]);
14559 bool sign_bit_compare_p = false;;
14562 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14563 compare_seq = get_insns ();
14566 compare_code = GET_CODE (compare_op);
14568 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
14569 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
14570 sign_bit_compare_p = true;
14572 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
14573 HImode insns, we'd be swallowed in word prefix ops. */
14575 if ((mode != HImode || TARGET_FAST_PREFIX)
14576 && (mode != (TARGET_64BIT ? TImode : DImode))
14577 && CONST_INT_P (operands[2])
14578 && CONST_INT_P (operands[3]))
14580 rtx out = operands[0];
14581 HOST_WIDE_INT ct = INTVAL (operands[2]);
14582 HOST_WIDE_INT cf = INTVAL (operands[3]);
14583 HOST_WIDE_INT diff;
14586 /* Sign bit compares are better done using shifts than we do by using
14588 if (sign_bit_compare_p
14589 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14590 ix86_compare_op1, &compare_op))
14592 /* Detect overlap between destination and compare sources. */
14595 if (!sign_bit_compare_p)
14597 bool fpcmp = false;
14599 compare_code = GET_CODE (compare_op);
14601 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14602 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14605 compare_code = ix86_fp_compare_code_to_integer (compare_code);
14608 /* To simplify rest of code, restrict to the GEU case. */
14609 if (compare_code == LTU)
14611 HOST_WIDE_INT tmp = ct;
14614 compare_code = reverse_condition (compare_code);
14615 code = reverse_condition (code);
14620 PUT_CODE (compare_op,
14621 reverse_condition_maybe_unordered
14622 (GET_CODE (compare_op)));
14624 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14628 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
14629 || reg_overlap_mentioned_p (out, ix86_compare_op1))
14630 tmp = gen_reg_rtx (mode);
14632 if (mode == DImode)
14633 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
14635 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
14639 if (code == GT || code == GE)
14640 code = reverse_condition (code);
14643 HOST_WIDE_INT tmp = ct;
14648 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
14649 ix86_compare_op1, VOIDmode, 0, -1);
14662 tmp = expand_simple_binop (mode, PLUS,
14664 copy_rtx (tmp), 1, OPTAB_DIRECT);
14675 tmp = expand_simple_binop (mode, IOR,
14677 copy_rtx (tmp), 1, OPTAB_DIRECT);
14679 else if (diff == -1 && ct)
14689 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14691 tmp = expand_simple_binop (mode, PLUS,
14692 copy_rtx (tmp), GEN_INT (cf),
14693 copy_rtx (tmp), 1, OPTAB_DIRECT);
14701 * andl cf - ct, dest
14711 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14714 tmp = expand_simple_binop (mode, AND,
14716 gen_int_mode (cf - ct, mode),
14717 copy_rtx (tmp), 1, OPTAB_DIRECT);
14719 tmp = expand_simple_binop (mode, PLUS,
14720 copy_rtx (tmp), GEN_INT (ct),
14721 copy_rtx (tmp), 1, OPTAB_DIRECT);
14724 if (!rtx_equal_p (tmp, out))
14725 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
14727 return 1; /* DONE */
14732 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14735 tmp = ct, ct = cf, cf = tmp;
14738 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14740 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14742 /* We may be reversing unordered compare to normal compare, that
14743 is not valid in general (we may convert non-trapping condition
14744 to trapping one), however on i386 we currently emit all
14745 comparisons unordered. */
14746 compare_code = reverse_condition_maybe_unordered (compare_code);
14747 code = reverse_condition_maybe_unordered (code);
14751 compare_code = reverse_condition (compare_code);
14752 code = reverse_condition (code);
14756 compare_code = UNKNOWN;
14757 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
14758 && CONST_INT_P (ix86_compare_op1))
14760 if (ix86_compare_op1 == const0_rtx
14761 && (code == LT || code == GE))
14762 compare_code = code;
14763 else if (ix86_compare_op1 == constm1_rtx)
14767 else if (code == GT)
14772 /* Optimize dest = (op0 < 0) ? -1 : cf. */
14773 if (compare_code != UNKNOWN
14774 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
14775 && (cf == -1 || ct == -1))
14777 /* If lea code below could be used, only optimize
14778 if it results in a 2 insn sequence. */
14780 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
14781 || diff == 3 || diff == 5 || diff == 9)
14782 || (compare_code == LT && ct == -1)
14783 || (compare_code == GE && cf == -1))
14786 * notl op1 (if necessary)
14794 code = reverse_condition (code);
14797 out = emit_store_flag (out, code, ix86_compare_op0,
14798 ix86_compare_op1, VOIDmode, 0, -1);
14800 out = expand_simple_binop (mode, IOR,
14802 out, 1, OPTAB_DIRECT);
14803 if (out != operands[0])
14804 emit_move_insn (operands[0], out);
14806 return 1; /* DONE */
14811 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
14812 || diff == 3 || diff == 5 || diff == 9)
14813 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
14815 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
14821 * lea cf(dest*(ct-cf)),dest
14825 * This also catches the degenerate setcc-only case.
14831 out = emit_store_flag (out, code, ix86_compare_op0,
14832 ix86_compare_op1, VOIDmode, 0, 1);
14835 /* On x86_64 the lea instruction operates on Pmode, so we need
14836 to get arithmetics done in proper mode to match. */
14838 tmp = copy_rtx (out);
14842 out1 = copy_rtx (out);
14843 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
14847 tmp = gen_rtx_PLUS (mode, tmp, out1);
14853 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
14856 if (!rtx_equal_p (tmp, out))
14859 out = force_operand (tmp, copy_rtx (out));
14861 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
14863 if (!rtx_equal_p (out, operands[0]))
14864 emit_move_insn (operands[0], copy_rtx (out));
14866 return 1; /* DONE */
14870 * General case: Jumpful:
14871 * xorl dest,dest cmpl op1, op2
14872 * cmpl op1, op2 movl ct, dest
14873 * setcc dest jcc 1f
14874 * decl dest movl cf, dest
14875 * andl (cf-ct),dest 1:
14878 * Size 20. Size 14.
14880 * This is reasonably steep, but branch mispredict costs are
14881 * high on modern cpus, so consider failing only if optimizing
14885 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14886 && BRANCH_COST (optimize_insn_for_speed_p (),
14891 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14896 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14898 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14900 /* We may be reversing unordered compare to normal compare,
14901 that is not valid in general (we may convert non-trapping
14902 condition to trapping one), however on i386 we currently
14903 emit all comparisons unordered. */
14904 code = reverse_condition_maybe_unordered (code);
14908 code = reverse_condition (code);
14909 if (compare_code != UNKNOWN)
14910 compare_code = reverse_condition (compare_code);
14914 if (compare_code != UNKNOWN)
14916 /* notl op1 (if needed)
14921 For x < 0 (resp. x <= -1) there will be no notl,
14922 so if possible swap the constants to get rid of the
14924 True/false will be -1/0 while code below (store flag
14925 followed by decrement) is 0/-1, so the constants need
14926 to be exchanged once more. */
14928 if (compare_code == GE || !cf)
14930 code = reverse_condition (code);
14935 HOST_WIDE_INT tmp = cf;
14940 out = emit_store_flag (out, code, ix86_compare_op0,
14941 ix86_compare_op1, VOIDmode, 0, -1);
14945 out = emit_store_flag (out, code, ix86_compare_op0,
14946 ix86_compare_op1, VOIDmode, 0, 1);
14948 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
14949 copy_rtx (out), 1, OPTAB_DIRECT);
14952 out = expand_simple_binop (mode, AND, copy_rtx (out),
14953 gen_int_mode (cf - ct, mode),
14954 copy_rtx (out), 1, OPTAB_DIRECT);
14956 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
14957 copy_rtx (out), 1, OPTAB_DIRECT);
14958 if (!rtx_equal_p (out, operands[0]))
14959 emit_move_insn (operands[0], copy_rtx (out));
14961 return 1; /* DONE */
14965 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14967 /* Try a few things more with specific constants and a variable. */
14970 rtx var, orig_out, out, tmp;
14972 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
14973 return 0; /* FAIL */
14975 /* If one of the two operands is an interesting constant, load a
14976 constant with the above and mask it in with a logical operation. */
14978 if (CONST_INT_P (operands[2]))
14981 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
14982 operands[3] = constm1_rtx, op = and_optab;
14983 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
14984 operands[3] = const0_rtx, op = ior_optab;
14986 return 0; /* FAIL */
14988 else if (CONST_INT_P (operands[3]))
14991 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
14992 operands[2] = constm1_rtx, op = and_optab;
14993 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
14994 operands[2] = const0_rtx, op = ior_optab;
14996 return 0; /* FAIL */
14999 return 0; /* FAIL */
15001 orig_out = operands[0];
15002 tmp = gen_reg_rtx (mode);
15005 /* Recurse to get the constant loaded. */
15006 if (ix86_expand_int_movcc (operands) == 0)
15007 return 0; /* FAIL */
15009 /* Mask in the interesting variable. */
15010 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15012 if (!rtx_equal_p (out, orig_out))
15013 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15015 return 1; /* DONE */
15019 * For comparison with above,
15029 if (! nonimmediate_operand (operands[2], mode))
15030 operands[2] = force_reg (mode, operands[2]);
15031 if (! nonimmediate_operand (operands[3], mode))
15032 operands[3] = force_reg (mode, operands[3]);
15034 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15036 rtx tmp = gen_reg_rtx (mode);
15037 emit_move_insn (tmp, operands[3]);
15040 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15042 rtx tmp = gen_reg_rtx (mode);
15043 emit_move_insn (tmp, operands[2]);
15047 if (! register_operand (operands[2], VOIDmode)
15049 || ! register_operand (operands[3], VOIDmode)))
15050 operands[2] = force_reg (mode, operands[2]);
15053 && ! register_operand (operands[3], VOIDmode))
15054 operands[3] = force_reg (mode, operands[3]);
15056 emit_insn (compare_seq);
15057 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15058 gen_rtx_IF_THEN_ELSE (mode,
15059 compare_op, operands[2],
15062 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15063 gen_rtx_IF_THEN_ELSE (mode,
15065 copy_rtx (operands[3]),
15066 copy_rtx (operands[0]))));
15068 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15069 gen_rtx_IF_THEN_ELSE (mode,
15071 copy_rtx (operands[2]),
15072 copy_rtx (operands[0]))));
15074 return 1; /* DONE */
15077 /* Swap, force into registers, or otherwise massage the two operands
15078 to an sse comparison with a mask result. Thus we differ a bit from
15079 ix86_prepare_fp_compare_args which expects to produce a flags result.
15081 The DEST operand exists to help determine whether to commute commutative
15082 operators. The POP0/POP1 operands are updated in place. The new
15083 comparison code is returned, or UNKNOWN if not implementable. */
15085 static enum rtx_code
15086 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15087 rtx *pop0, rtx *pop1)
15095 /* We have no LTGT as an operator. We could implement it with
15096 NE & ORDERED, but this requires an extra temporary. It's
15097 not clear that it's worth it. */
15104 /* These are supported directly. */
15111 /* For commutative operators, try to canonicalize the destination
15112 operand to be first in the comparison - this helps reload to
15113 avoid extra moves. */
15114 if (!dest || !rtx_equal_p (dest, *pop1))
15122 /* These are not supported directly. Swap the comparison operands
15123 to transform into something that is supported. */
15127 code = swap_condition (code);
15131 gcc_unreachable ();
15137 /* Detect conditional moves that exactly match min/max operational
15138 semantics. Note that this is IEEE safe, as long as we don't
15139 interchange the operands.
15141 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15142 and TRUE if the operation is successful and instructions are emitted. */
15145 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15146 rtx cmp_op1, rtx if_true, rtx if_false)
15148 enum machine_mode mode;
15154 else if (code == UNGE)
15157 if_true = if_false;
15163 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15165 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15170 mode = GET_MODE (dest);
15172 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15173 but MODE may be a vector mode and thus not appropriate. */
15174 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15176 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15179 if_true = force_reg (mode, if_true);
15180 v = gen_rtvec (2, if_true, if_false);
15181 tmp = gen_rtx_UNSPEC (mode, v, u);
15185 code = is_min ? SMIN : SMAX;
15186 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15189 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15193 /* Expand an sse vector comparison. Return the register with the result. */
15196 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15197 rtx op_true, rtx op_false)
15199 enum machine_mode mode = GET_MODE (dest);
15202 cmp_op0 = force_reg (mode, cmp_op0);
15203 if (!nonimmediate_operand (cmp_op1, mode))
15204 cmp_op1 = force_reg (mode, cmp_op1);
15207 || reg_overlap_mentioned_p (dest, op_true)
15208 || reg_overlap_mentioned_p (dest, op_false))
15209 dest = gen_reg_rtx (mode);
15211 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15212 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15217 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15218 operations. This is used for both scalar and vector conditional moves. */
15221 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15223 enum machine_mode mode = GET_MODE (dest);
15226 if (op_false == CONST0_RTX (mode))
15228 op_true = force_reg (mode, op_true);
15229 x = gen_rtx_AND (mode, cmp, op_true);
15230 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15232 else if (op_true == CONST0_RTX (mode))
15234 op_false = force_reg (mode, op_false);
15235 x = gen_rtx_NOT (mode, cmp);
15236 x = gen_rtx_AND (mode, x, op_false);
15237 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15239 else if (TARGET_SSE5)
15241 rtx pcmov = gen_rtx_SET (mode, dest,
15242 gen_rtx_IF_THEN_ELSE (mode, cmp,
15249 op_true = force_reg (mode, op_true);
15250 op_false = force_reg (mode, op_false);
15252 t2 = gen_reg_rtx (mode);
15254 t3 = gen_reg_rtx (mode);
15258 x = gen_rtx_AND (mode, op_true, cmp);
15259 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15261 x = gen_rtx_NOT (mode, cmp);
15262 x = gen_rtx_AND (mode, x, op_false);
15263 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15265 x = gen_rtx_IOR (mode, t3, t2);
15266 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15270 /* Expand a floating-point conditional move. Return true if successful. */
15273 ix86_expand_fp_movcc (rtx operands[])
15275 enum machine_mode mode = GET_MODE (operands[0]);
15276 enum rtx_code code = GET_CODE (operands[1]);
15277 rtx tmp, compare_op, second_test, bypass_test;
15279 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15281 enum machine_mode cmode;
15283 /* Since we've no cmove for sse registers, don't force bad register
15284 allocation just to gain access to it. Deny movcc when the
15285 comparison mode doesn't match the move mode. */
15286 cmode = GET_MODE (ix86_compare_op0);
15287 if (cmode == VOIDmode)
15288 cmode = GET_MODE (ix86_compare_op1);
15292 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15294 &ix86_compare_op1);
15295 if (code == UNKNOWN)
15298 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15299 ix86_compare_op1, operands[2],
15303 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15304 ix86_compare_op1, operands[2], operands[3]);
15305 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15309 /* The floating point conditional move instructions don't directly
15310 support conditions resulting from a signed integer comparison. */
15312 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15314 /* The floating point conditional move instructions don't directly
15315 support signed integer comparisons. */
15317 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15319 gcc_assert (!second_test && !bypass_test);
15320 tmp = gen_reg_rtx (QImode);
15321 ix86_expand_setcc (code, tmp);
15323 ix86_compare_op0 = tmp;
15324 ix86_compare_op1 = const0_rtx;
15325 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15327 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15329 tmp = gen_reg_rtx (mode);
15330 emit_move_insn (tmp, operands[3]);
15333 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15335 tmp = gen_reg_rtx (mode);
15336 emit_move_insn (tmp, operands[2]);
15340 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15341 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15342 operands[2], operands[3])));
15344 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15345 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15346 operands[3], operands[0])));
15348 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15349 gen_rtx_IF_THEN_ELSE (mode, second_test,
15350 operands[2], operands[0])));
15355 /* Expand a floating-point vector conditional move; a vcond operation
15356 rather than a movcc operation. */
15359 ix86_expand_fp_vcond (rtx operands[])
15361 enum rtx_code code = GET_CODE (operands[3]);
15364 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15365 &operands[4], &operands[5]);
15366 if (code == UNKNOWN)
15369 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15370 operands[5], operands[1], operands[2]))
15373 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15374 operands[1], operands[2]);
15375 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15379 /* Expand a signed/unsigned integral vector conditional move. */
15382 ix86_expand_int_vcond (rtx operands[])
15384 enum machine_mode mode = GET_MODE (operands[0]);
15385 enum rtx_code code = GET_CODE (operands[3]);
15386 bool negate = false;
15389 cop0 = operands[4];
15390 cop1 = operands[5];
15392 /* SSE5 supports all of the comparisons on all vector int types. */
15395 /* Canonicalize the comparison to EQ, GT, GTU. */
15406 code = reverse_condition (code);
15412 code = reverse_condition (code);
15418 code = swap_condition (code);
15419 x = cop0, cop0 = cop1, cop1 = x;
15423 gcc_unreachable ();
15426 /* Only SSE4.1/SSE4.2 supports V2DImode. */
15427 if (mode == V2DImode)
15432 /* SSE4.1 supports EQ. */
15433 if (!TARGET_SSE4_1)
15439 /* SSE4.2 supports GT/GTU. */
15440 if (!TARGET_SSE4_2)
15445 gcc_unreachable ();
15449 /* Unsigned parallel compare is not supported by the hardware. Play some
15450 tricks to turn this into a signed comparison against 0. */
15453 cop0 = force_reg (mode, cop0);
15462 /* Perform a parallel modulo subtraction. */
15463 t1 = gen_reg_rtx (mode);
15464 emit_insn ((mode == V4SImode
15466 : gen_subv2di3) (t1, cop0, cop1));
15468 /* Extract the original sign bit of op0. */
15469 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
15471 t2 = gen_reg_rtx (mode);
15472 emit_insn ((mode == V4SImode
15474 : gen_andv2di3) (t2, cop0, mask));
15476 /* XOR it back into the result of the subtraction. This results
15477 in the sign bit set iff we saw unsigned underflow. */
15478 x = gen_reg_rtx (mode);
15479 emit_insn ((mode == V4SImode
15481 : gen_xorv2di3) (x, t1, t2));
15489 /* Perform a parallel unsigned saturating subtraction. */
15490 x = gen_reg_rtx (mode);
15491 emit_insn (gen_rtx_SET (VOIDmode, x,
15492 gen_rtx_US_MINUS (mode, cop0, cop1)));
15499 gcc_unreachable ();
15503 cop1 = CONST0_RTX (mode);
15507 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
15508 operands[1+negate], operands[2-negate]);
15510 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
15511 operands[2-negate]);
15515 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
15516 true if we should do zero extension, else sign extension. HIGH_P is
15517 true if we want the N/2 high elements, else the low elements. */
15520 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15522 enum machine_mode imode = GET_MODE (operands[1]);
15523 rtx (*unpack)(rtx, rtx, rtx);
15530 unpack = gen_vec_interleave_highv16qi;
15532 unpack = gen_vec_interleave_lowv16qi;
15536 unpack = gen_vec_interleave_highv8hi;
15538 unpack = gen_vec_interleave_lowv8hi;
15542 unpack = gen_vec_interleave_highv4si;
15544 unpack = gen_vec_interleave_lowv4si;
15547 gcc_unreachable ();
15550 dest = gen_lowpart (imode, operands[0]);
15553 se = force_reg (imode, CONST0_RTX (imode));
15555 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
15556 operands[1], pc_rtx, pc_rtx);
15558 emit_insn (unpack (dest, operands[1], se));
15561 /* This function performs the same task as ix86_expand_sse_unpack,
15562 but with SSE4.1 instructions. */
15565 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15567 enum machine_mode imode = GET_MODE (operands[1]);
15568 rtx (*unpack)(rtx, rtx);
15575 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
15577 unpack = gen_sse4_1_extendv8qiv8hi2;
15581 unpack = gen_sse4_1_zero_extendv4hiv4si2;
15583 unpack = gen_sse4_1_extendv4hiv4si2;
15587 unpack = gen_sse4_1_zero_extendv2siv2di2;
15589 unpack = gen_sse4_1_extendv2siv2di2;
15592 gcc_unreachable ();
15595 dest = operands[0];
15598 /* Shift higher 8 bytes to lower 8 bytes. */
15599 src = gen_reg_rtx (imode);
15600 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
15601 gen_lowpart (TImode, operands[1]),
15607 emit_insn (unpack (dest, src));
15610 /* This function performs the same task as ix86_expand_sse_unpack,
15611 but with sse5 instructions. */
15614 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15616 enum machine_mode imode = GET_MODE (operands[1]);
15617 int pperm_bytes[16];
15619 int h = (high_p) ? 8 : 0;
15622 rtvec v = rtvec_alloc (16);
15625 rtx op0 = operands[0], op1 = operands[1];
15630 vs = rtvec_alloc (8);
15631 h2 = (high_p) ? 8 : 0;
15632 for (i = 0; i < 8; i++)
15634 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
15635 pperm_bytes[2*i+1] = ((unsigned_p)
15637 : PPERM_SIGN | PPERM_SRC2 | i | h);
15640 for (i = 0; i < 16; i++)
15641 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15643 for (i = 0; i < 8; i++)
15644 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15646 p = gen_rtx_PARALLEL (VOIDmode, vs);
15647 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15649 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
15651 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
15655 vs = rtvec_alloc (4);
15656 h2 = (high_p) ? 4 : 0;
15657 for (i = 0; i < 4; i++)
15659 sign_extend = ((unsigned_p)
15661 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
15662 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
15663 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
15664 pperm_bytes[4*i+2] = sign_extend;
15665 pperm_bytes[4*i+3] = sign_extend;
15668 for (i = 0; i < 16; i++)
15669 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15671 for (i = 0; i < 4; i++)
15672 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15674 p = gen_rtx_PARALLEL (VOIDmode, vs);
15675 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15677 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
15679 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
15683 vs = rtvec_alloc (2);
15684 h2 = (high_p) ? 2 : 0;
15685 for (i = 0; i < 2; i++)
15687 sign_extend = ((unsigned_p)
15689 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
15690 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
15691 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
15692 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
15693 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
15694 pperm_bytes[8*i+4] = sign_extend;
15695 pperm_bytes[8*i+5] = sign_extend;
15696 pperm_bytes[8*i+6] = sign_extend;
15697 pperm_bytes[8*i+7] = sign_extend;
15700 for (i = 0; i < 16; i++)
15701 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15703 for (i = 0; i < 2; i++)
15704 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15706 p = gen_rtx_PARALLEL (VOIDmode, vs);
15707 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15709 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
15711 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
15715 gcc_unreachable ();
15721 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
15722 next narrower integer vector type */
15724 ix86_expand_sse5_pack (rtx operands[3])
15726 enum machine_mode imode = GET_MODE (operands[0]);
15727 int pperm_bytes[16];
15729 rtvec v = rtvec_alloc (16);
15731 rtx op0 = operands[0];
15732 rtx op1 = operands[1];
15733 rtx op2 = operands[2];
15738 for (i = 0; i < 8; i++)
15740 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
15741 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
15744 for (i = 0; i < 16; i++)
15745 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15747 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15748 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
15752 for (i = 0; i < 4; i++)
15754 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
15755 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
15756 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
15757 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
15760 for (i = 0; i < 16; i++)
15761 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15763 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15764 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
15768 for (i = 0; i < 2; i++)
15770 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
15771 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
15772 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
15773 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
15774 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
15775 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
15776 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
15777 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
15780 for (i = 0; i < 16; i++)
15781 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15783 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15784 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
15788 gcc_unreachable ();
15794 /* Expand conditional increment or decrement using adb/sbb instructions.
15795 The default case using setcc followed by the conditional move can be
15796 done by generic code. */
15798 ix86_expand_int_addcc (rtx operands[])
15800 enum rtx_code code = GET_CODE (operands[1]);
15802 rtx val = const0_rtx;
15803 bool fpcmp = false;
15804 enum machine_mode mode = GET_MODE (operands[0]);
15806 if (operands[3] != const1_rtx
15807 && operands[3] != constm1_rtx)
15809 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15810 ix86_compare_op1, &compare_op))
15812 code = GET_CODE (compare_op);
15814 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15815 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15818 code = ix86_fp_compare_code_to_integer (code);
15825 PUT_CODE (compare_op,
15826 reverse_condition_maybe_unordered
15827 (GET_CODE (compare_op)));
15829 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15831 PUT_MODE (compare_op, mode);
15833 /* Construct either adc or sbb insn. */
15834 if ((code == LTU) == (operands[3] == constm1_rtx))
15836 switch (GET_MODE (operands[0]))
15839 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
15842 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
15845 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
15848 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15851 gcc_unreachable ();
15856 switch (GET_MODE (operands[0]))
15859 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
15862 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
15865 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
15868 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15871 gcc_unreachable ();
15874 return 1; /* DONE */
15878 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
15879 works for floating pointer parameters and nonoffsetable memories.
15880 For pushes, it returns just stack offsets; the values will be saved
15881 in the right order. Maximally three parts are generated. */
15884 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
15889 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
15891 size = (GET_MODE_SIZE (mode) + 4) / 8;
15893 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
15894 gcc_assert (size >= 2 && size <= 4);
15896 /* Optimize constant pool reference to immediates. This is used by fp
15897 moves, that force all constants to memory to allow combining. */
15898 if (MEM_P (operand) && MEM_READONLY_P (operand))
15900 rtx tmp = maybe_get_pool_constant (operand);
15905 if (MEM_P (operand) && !offsettable_memref_p (operand))
15907 /* The only non-offsetable memories we handle are pushes. */
15908 int ok = push_operand (operand, VOIDmode);
15912 operand = copy_rtx (operand);
15913 PUT_MODE (operand, Pmode);
15914 parts[0] = parts[1] = parts[2] = parts[3] = operand;
15918 if (GET_CODE (operand) == CONST_VECTOR)
15920 enum machine_mode imode = int_mode_for_mode (mode);
15921 /* Caution: if we looked through a constant pool memory above,
15922 the operand may actually have a different mode now. That's
15923 ok, since we want to pun this all the way back to an integer. */
15924 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
15925 gcc_assert (operand != NULL);
15931 if (mode == DImode)
15932 split_di (&operand, 1, &parts[0], &parts[1]);
15937 if (REG_P (operand))
15939 gcc_assert (reload_completed);
15940 for (i = 0; i < size; i++)
15941 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
15943 else if (offsettable_memref_p (operand))
15945 operand = adjust_address (operand, SImode, 0);
15946 parts[0] = operand;
15947 for (i = 1; i < size; i++)
15948 parts[i] = adjust_address (operand, SImode, 4 * i);
15950 else if (GET_CODE (operand) == CONST_DOUBLE)
15955 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15959 real_to_target (l, &r, mode);
15960 parts[3] = gen_int_mode (l[3], SImode);
15961 parts[2] = gen_int_mode (l[2], SImode);
15964 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
15965 parts[2] = gen_int_mode (l[2], SImode);
15968 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15971 gcc_unreachable ();
15973 parts[1] = gen_int_mode (l[1], SImode);
15974 parts[0] = gen_int_mode (l[0], SImode);
15977 gcc_unreachable ();
15982 if (mode == TImode)
15983 split_ti (&operand, 1, &parts[0], &parts[1]);
15984 if (mode == XFmode || mode == TFmode)
15986 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
15987 if (REG_P (operand))
15989 gcc_assert (reload_completed);
15990 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
15991 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
15993 else if (offsettable_memref_p (operand))
15995 operand = adjust_address (operand, DImode, 0);
15996 parts[0] = operand;
15997 parts[1] = adjust_address (operand, upper_mode, 8);
15999 else if (GET_CODE (operand) == CONST_DOUBLE)
16004 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16005 real_to_target (l, &r, mode);
16007 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16008 if (HOST_BITS_PER_WIDE_INT >= 64)
16011 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16012 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16015 parts[0] = immed_double_const (l[0], l[1], DImode);
16017 if (upper_mode == SImode)
16018 parts[1] = gen_int_mode (l[2], SImode);
16019 else if (HOST_BITS_PER_WIDE_INT >= 64)
16022 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16023 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16026 parts[1] = immed_double_const (l[2], l[3], DImode);
16029 gcc_unreachable ();
16036 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16037 Return false when normal moves are needed; true when all required
16038 insns have been emitted. Operands 2-4 contain the input values
16039 int the correct order; operands 5-7 contain the output values. */
16042 ix86_split_long_move (rtx operands[])
16047 int collisions = 0;
16048 enum machine_mode mode = GET_MODE (operands[0]);
16049 bool collisionparts[4];
16051 /* The DFmode expanders may ask us to move double.
16052 For 64bit target this is single move. By hiding the fact
16053 here we simplify i386.md splitters. */
16054 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16056 /* Optimize constant pool reference to immediates. This is used by
16057 fp moves, that force all constants to memory to allow combining. */
16059 if (MEM_P (operands[1])
16060 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16061 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16062 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16063 if (push_operand (operands[0], VOIDmode))
16065 operands[0] = copy_rtx (operands[0]);
16066 PUT_MODE (operands[0], Pmode);
16069 operands[0] = gen_lowpart (DImode, operands[0]);
16070 operands[1] = gen_lowpart (DImode, operands[1]);
16071 emit_move_insn (operands[0], operands[1]);
16075 /* The only non-offsettable memory we handle is push. */
16076 if (push_operand (operands[0], VOIDmode))
16079 gcc_assert (!MEM_P (operands[0])
16080 || offsettable_memref_p (operands[0]));
16082 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16083 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16085 /* When emitting push, take care for source operands on the stack. */
16086 if (push && MEM_P (operands[1])
16087 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16088 for (i = 0; i < nparts - 1; i++)
16089 part[1][i] = change_address (part[1][i],
16090 GET_MODE (part[1][i]),
16091 XEXP (part[1][i + 1], 0));
16093 /* We need to do copy in the right order in case an address register
16094 of the source overlaps the destination. */
16095 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16099 for (i = 0; i < nparts; i++)
16102 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16103 if (collisionparts[i])
16107 /* Collision in the middle part can be handled by reordering. */
16108 if (collisions == 1 && nparts == 3 && collisionparts [1])
16110 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16111 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16113 else if (collisions == 1
16115 && (collisionparts [1] || collisionparts [2]))
16117 if (collisionparts [1])
16119 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16120 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16124 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16125 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16129 /* If there are more collisions, we can't handle it by reordering.
16130 Do an lea to the last part and use only one colliding move. */
16131 else if (collisions > 1)
16137 base = part[0][nparts - 1];
16139 /* Handle the case when the last part isn't valid for lea.
16140 Happens in 64-bit mode storing the 12-byte XFmode. */
16141 if (GET_MODE (base) != Pmode)
16142 base = gen_rtx_REG (Pmode, REGNO (base));
16144 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16145 part[1][0] = replace_equiv_address (part[1][0], base);
16146 for (i = 1; i < nparts; i++)
16148 tmp = plus_constant (base, UNITS_PER_WORD * i);
16149 part[1][i] = replace_equiv_address (part[1][i], tmp);
16160 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16161 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16162 emit_move_insn (part[0][2], part[1][2]);
16164 else if (nparts == 4)
16166 emit_move_insn (part[0][3], part[1][3]);
16167 emit_move_insn (part[0][2], part[1][2]);
16172 /* In 64bit mode we don't have 32bit push available. In case this is
16173 register, it is OK - we will just use larger counterpart. We also
16174 retype memory - these comes from attempt to avoid REX prefix on
16175 moving of second half of TFmode value. */
16176 if (GET_MODE (part[1][1]) == SImode)
16178 switch (GET_CODE (part[1][1]))
16181 part[1][1] = adjust_address (part[1][1], DImode, 0);
16185 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16189 gcc_unreachable ();
16192 if (GET_MODE (part[1][0]) == SImode)
16193 part[1][0] = part[1][1];
16196 emit_move_insn (part[0][1], part[1][1]);
16197 emit_move_insn (part[0][0], part[1][0]);
16201 /* Choose correct order to not overwrite the source before it is copied. */
16202 if ((REG_P (part[0][0])
16203 && REG_P (part[1][1])
16204 && (REGNO (part[0][0]) == REGNO (part[1][1])
16206 && REGNO (part[0][0]) == REGNO (part[1][2]))
16208 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16210 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16212 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16214 operands[2 + i] = part[0][j];
16215 operands[6 + i] = part[1][j];
16220 for (i = 0; i < nparts; i++)
16222 operands[2 + i] = part[0][i];
16223 operands[6 + i] = part[1][i];
16227 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16228 if (optimize_insn_for_size_p ())
16230 for (j = 0; j < nparts - 1; j++)
16231 if (CONST_INT_P (operands[6 + j])
16232 && operands[6 + j] != const0_rtx
16233 && REG_P (operands[2 + j]))
16234 for (i = j; i < nparts - 1; i++)
16235 if (CONST_INT_P (operands[7 + i])
16236 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16237 operands[7 + i] = operands[2 + j];
16240 for (i = 0; i < nparts; i++)
16241 emit_move_insn (operands[2 + i], operands[6 + i]);
16246 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16247 left shift by a constant, either using a single shift or
16248 a sequence of add instructions. */
16251 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16255 emit_insn ((mode == DImode
16257 : gen_adddi3) (operand, operand, operand));
16259 else if (!optimize_insn_for_size_p ()
16260 && count * ix86_cost->add <= ix86_cost->shift_const)
16263 for (i=0; i<count; i++)
16265 emit_insn ((mode == DImode
16267 : gen_adddi3) (operand, operand, operand));
16271 emit_insn ((mode == DImode
16273 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16277 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16279 rtx low[2], high[2];
16281 const int single_width = mode == DImode ? 32 : 64;
16283 if (CONST_INT_P (operands[2]))
16285 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16286 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16288 if (count >= single_width)
16290 emit_move_insn (high[0], low[1]);
16291 emit_move_insn (low[0], const0_rtx);
16293 if (count > single_width)
16294 ix86_expand_ashl_const (high[0], count - single_width, mode);
16298 if (!rtx_equal_p (operands[0], operands[1]))
16299 emit_move_insn (operands[0], operands[1]);
16300 emit_insn ((mode == DImode
16302 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16303 ix86_expand_ashl_const (low[0], count, mode);
16308 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16310 if (operands[1] == const1_rtx)
16312 /* Assuming we've chosen a QImode capable registers, then 1 << N
16313 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16314 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16316 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16318 ix86_expand_clear (low[0]);
16319 ix86_expand_clear (high[0]);
16320 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16322 d = gen_lowpart (QImode, low[0]);
16323 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16324 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16325 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16327 d = gen_lowpart (QImode, high[0]);
16328 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16329 s = gen_rtx_NE (QImode, flags, const0_rtx);
16330 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16333 /* Otherwise, we can get the same results by manually performing
16334 a bit extract operation on bit 5/6, and then performing the two
16335 shifts. The two methods of getting 0/1 into low/high are exactly
16336 the same size. Avoiding the shift in the bit extract case helps
16337 pentium4 a bit; no one else seems to care much either way. */
16342 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16343 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16345 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16346 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16348 emit_insn ((mode == DImode
16350 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16351 emit_insn ((mode == DImode
16353 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16354 emit_move_insn (low[0], high[0]);
16355 emit_insn ((mode == DImode
16357 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16360 emit_insn ((mode == DImode
16362 : gen_ashldi3) (low[0], low[0], operands[2]));
16363 emit_insn ((mode == DImode
16365 : gen_ashldi3) (high[0], high[0], operands[2]));
16369 if (operands[1] == constm1_rtx)
16371 /* For -1 << N, we can avoid the shld instruction, because we
16372 know that we're shifting 0...31/63 ones into a -1. */
16373 emit_move_insn (low[0], constm1_rtx);
16374 if (optimize_insn_for_size_p ())
16375 emit_move_insn (high[0], low[0]);
16377 emit_move_insn (high[0], constm1_rtx);
16381 if (!rtx_equal_p (operands[0], operands[1]))
16382 emit_move_insn (operands[0], operands[1]);
16384 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16385 emit_insn ((mode == DImode
16387 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16390 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16392 if (TARGET_CMOVE && scratch)
16394 ix86_expand_clear (scratch);
16395 emit_insn ((mode == DImode
16396 ? gen_x86_shift_adj_1
16397 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16401 emit_insn ((mode == DImode
16402 ? gen_x86_shift_adj_2
16403 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16407 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16409 rtx low[2], high[2];
16411 const int single_width = mode == DImode ? 32 : 64;
16413 if (CONST_INT_P (operands[2]))
16415 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16416 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16418 if (count == single_width * 2 - 1)
16420 emit_move_insn (high[0], high[1]);
16421 emit_insn ((mode == DImode
16423 : gen_ashrdi3) (high[0], high[0],
16424 GEN_INT (single_width - 1)));
16425 emit_move_insn (low[0], high[0]);
16428 else if (count >= single_width)
16430 emit_move_insn (low[0], high[1]);
16431 emit_move_insn (high[0], low[0]);
16432 emit_insn ((mode == DImode
16434 : gen_ashrdi3) (high[0], high[0],
16435 GEN_INT (single_width - 1)));
16436 if (count > single_width)
16437 emit_insn ((mode == DImode
16439 : gen_ashrdi3) (low[0], low[0],
16440 GEN_INT (count - single_width)));
16444 if (!rtx_equal_p (operands[0], operands[1]))
16445 emit_move_insn (operands[0], operands[1]);
16446 emit_insn ((mode == DImode
16448 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16449 emit_insn ((mode == DImode
16451 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
16456 if (!rtx_equal_p (operands[0], operands[1]))
16457 emit_move_insn (operands[0], operands[1]);
16459 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16461 emit_insn ((mode == DImode
16463 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16464 emit_insn ((mode == DImode
16466 : gen_ashrdi3) (high[0], high[0], operands[2]));
16468 if (TARGET_CMOVE && scratch)
16470 emit_move_insn (scratch, high[0]);
16471 emit_insn ((mode == DImode
16473 : gen_ashrdi3) (scratch, scratch,
16474 GEN_INT (single_width - 1)));
16475 emit_insn ((mode == DImode
16476 ? gen_x86_shift_adj_1
16477 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16481 emit_insn ((mode == DImode
16482 ? gen_x86_shift_adj_3
16483 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
16488 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
16490 rtx low[2], high[2];
16492 const int single_width = mode == DImode ? 32 : 64;
16494 if (CONST_INT_P (operands[2]))
16496 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16497 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16499 if (count >= single_width)
16501 emit_move_insn (low[0], high[1]);
16502 ix86_expand_clear (high[0]);
16504 if (count > single_width)
16505 emit_insn ((mode == DImode
16507 : gen_lshrdi3) (low[0], low[0],
16508 GEN_INT (count - single_width)));
16512 if (!rtx_equal_p (operands[0], operands[1]))
16513 emit_move_insn (operands[0], operands[1]);
16514 emit_insn ((mode == DImode
16516 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16517 emit_insn ((mode == DImode
16519 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
16524 if (!rtx_equal_p (operands[0], operands[1]))
16525 emit_move_insn (operands[0], operands[1]);
16527 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16529 emit_insn ((mode == DImode
16531 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16532 emit_insn ((mode == DImode
16534 : gen_lshrdi3) (high[0], high[0], operands[2]));
16536 /* Heh. By reversing the arguments, we can reuse this pattern. */
16537 if (TARGET_CMOVE && scratch)
16539 ix86_expand_clear (scratch);
16540 emit_insn ((mode == DImode
16541 ? gen_x86_shift_adj_1
16542 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16546 emit_insn ((mode == DImode
16547 ? gen_x86_shift_adj_2
16548 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
16552 /* Predict just emitted jump instruction to be taken with probability PROB. */
16554 predict_jump (int prob)
16556 rtx insn = get_last_insn ();
16557 gcc_assert (JUMP_P (insn));
16559 = gen_rtx_EXPR_LIST (REG_BR_PROB,
16564 /* Helper function for the string operations below. Dest VARIABLE whether
16565 it is aligned to VALUE bytes. If true, jump to the label. */
16567 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
16569 rtx label = gen_label_rtx ();
16570 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
16571 if (GET_MODE (variable) == DImode)
16572 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
16574 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
16575 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
16578 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16580 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16584 /* Adjust COUNTER by the VALUE. */
16586 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
16588 if (GET_MODE (countreg) == DImode)
16589 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
16591 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
16594 /* Zero extend possibly SImode EXP to Pmode register. */
16596 ix86_zero_extend_to_Pmode (rtx exp)
16599 if (GET_MODE (exp) == VOIDmode)
16600 return force_reg (Pmode, exp);
16601 if (GET_MODE (exp) == Pmode)
16602 return copy_to_mode_reg (Pmode, exp);
16603 r = gen_reg_rtx (Pmode);
16604 emit_insn (gen_zero_extendsidi2 (r, exp));
16608 /* Divide COUNTREG by SCALE. */
16610 scale_counter (rtx countreg, int scale)
16613 rtx piece_size_mask;
16617 if (CONST_INT_P (countreg))
16618 return GEN_INT (INTVAL (countreg) / scale);
16619 gcc_assert (REG_P (countreg));
16621 piece_size_mask = GEN_INT (scale - 1);
16622 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
16623 GEN_INT (exact_log2 (scale)),
16624 NULL, 1, OPTAB_DIRECT);
16628 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
16629 DImode for constant loop counts. */
16631 static enum machine_mode
16632 counter_mode (rtx count_exp)
16634 if (GET_MODE (count_exp) != VOIDmode)
16635 return GET_MODE (count_exp);
16636 if (GET_CODE (count_exp) != CONST_INT)
16638 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
16643 /* When SRCPTR is non-NULL, output simple loop to move memory
16644 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
16645 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
16646 equivalent loop to set memory by VALUE (supposed to be in MODE).
16648 The size is rounded down to whole number of chunk size moved at once.
16649 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
16653 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
16654 rtx destptr, rtx srcptr, rtx value,
16655 rtx count, enum machine_mode mode, int unroll,
16658 rtx out_label, top_label, iter, tmp;
16659 enum machine_mode iter_mode = counter_mode (count);
16660 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
16661 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
16667 top_label = gen_label_rtx ();
16668 out_label = gen_label_rtx ();
16669 iter = gen_reg_rtx (iter_mode);
16671 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
16672 NULL, 1, OPTAB_DIRECT);
16673 /* Those two should combine. */
16674 if (piece_size == const1_rtx)
16676 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
16678 predict_jump (REG_BR_PROB_BASE * 10 / 100);
16680 emit_move_insn (iter, const0_rtx);
16682 emit_label (top_label);
16684 tmp = convert_modes (Pmode, iter_mode, iter, true);
16685 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
16686 destmem = change_address (destmem, mode, x_addr);
16690 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
16691 srcmem = change_address (srcmem, mode, y_addr);
16693 /* When unrolling for chips that reorder memory reads and writes,
16694 we can save registers by using single temporary.
16695 Also using 4 temporaries is overkill in 32bit mode. */
16696 if (!TARGET_64BIT && 0)
16698 for (i = 0; i < unroll; i++)
16703 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16705 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16707 emit_move_insn (destmem, srcmem);
16713 gcc_assert (unroll <= 4);
16714 for (i = 0; i < unroll; i++)
16716 tmpreg[i] = gen_reg_rtx (mode);
16720 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16722 emit_move_insn (tmpreg[i], srcmem);
16724 for (i = 0; i < unroll; i++)
16729 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16731 emit_move_insn (destmem, tmpreg[i]);
16736 for (i = 0; i < unroll; i++)
16740 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16741 emit_move_insn (destmem, value);
16744 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
16745 true, OPTAB_LIB_WIDEN);
16747 emit_move_insn (iter, tmp);
16749 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
16751 if (expected_size != -1)
16753 expected_size /= GET_MODE_SIZE (mode) * unroll;
16754 if (expected_size == 0)
16756 else if (expected_size > REG_BR_PROB_BASE)
16757 predict_jump (REG_BR_PROB_BASE - 1);
16759 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
16762 predict_jump (REG_BR_PROB_BASE * 80 / 100);
16763 iter = ix86_zero_extend_to_Pmode (iter);
16764 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
16765 true, OPTAB_LIB_WIDEN);
16766 if (tmp != destptr)
16767 emit_move_insn (destptr, tmp);
16770 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
16771 true, OPTAB_LIB_WIDEN);
16773 emit_move_insn (srcptr, tmp);
16775 emit_label (out_label);
16778 /* Output "rep; mov" instruction.
16779 Arguments have same meaning as for previous function */
16781 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
16782 rtx destptr, rtx srcptr,
16784 enum machine_mode mode)
16790 /* If the size is known, it is shorter to use rep movs. */
16791 if (mode == QImode && CONST_INT_P (count)
16792 && !(INTVAL (count) & 3))
16795 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16796 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16797 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
16798 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
16799 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16800 if (mode != QImode)
16802 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16803 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16804 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16805 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
16806 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16807 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
16811 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16812 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
16814 if (CONST_INT_P (count))
16816 count = GEN_INT (INTVAL (count)
16817 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
16818 destmem = shallow_copy_rtx (destmem);
16819 srcmem = shallow_copy_rtx (srcmem);
16820 set_mem_size (destmem, count);
16821 set_mem_size (srcmem, count);
16825 if (MEM_SIZE (destmem))
16826 set_mem_size (destmem, NULL_RTX);
16827 if (MEM_SIZE (srcmem))
16828 set_mem_size (srcmem, NULL_RTX);
16830 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
16834 /* Output "rep; stos" instruction.
16835 Arguments have same meaning as for previous function */
16837 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
16838 rtx count, enum machine_mode mode,
16844 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16845 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16846 value = force_reg (mode, gen_lowpart (mode, value));
16847 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16848 if (mode != QImode)
16850 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16851 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16852 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16855 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16856 if (orig_value == const0_rtx && CONST_INT_P (count))
16858 count = GEN_INT (INTVAL (count)
16859 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
16860 destmem = shallow_copy_rtx (destmem);
16861 set_mem_size (destmem, count);
16863 else if (MEM_SIZE (destmem))
16864 set_mem_size (destmem, NULL_RTX);
16865 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
16869 emit_strmov (rtx destmem, rtx srcmem,
16870 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
16872 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
16873 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
16874 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16877 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
16879 expand_movmem_epilogue (rtx destmem, rtx srcmem,
16880 rtx destptr, rtx srcptr, rtx count, int max_size)
16883 if (CONST_INT_P (count))
16885 HOST_WIDE_INT countval = INTVAL (count);
16888 if ((countval & 0x10) && max_size > 16)
16892 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16893 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
16896 gcc_unreachable ();
16899 if ((countval & 0x08) && max_size > 8)
16902 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16905 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16906 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
16910 if ((countval & 0x04) && max_size > 4)
16912 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16915 if ((countval & 0x02) && max_size > 2)
16917 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
16920 if ((countval & 0x01) && max_size > 1)
16922 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
16929 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
16930 count, 1, OPTAB_DIRECT);
16931 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
16932 count, QImode, 1, 4);
16936 /* When there are stringops, we can cheaply increase dest and src pointers.
16937 Otherwise we save code size by maintaining offset (zero is readily
16938 available from preceding rep operation) and using x86 addressing modes.
16940 if (TARGET_SINGLE_STRINGOP)
16944 rtx label = ix86_expand_aligntest (count, 4, true);
16945 src = change_address (srcmem, SImode, srcptr);
16946 dest = change_address (destmem, SImode, destptr);
16947 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16948 emit_label (label);
16949 LABEL_NUSES (label) = 1;
16953 rtx label = ix86_expand_aligntest (count, 2, true);
16954 src = change_address (srcmem, HImode, srcptr);
16955 dest = change_address (destmem, HImode, destptr);
16956 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16957 emit_label (label);
16958 LABEL_NUSES (label) = 1;
16962 rtx label = ix86_expand_aligntest (count, 1, true);
16963 src = change_address (srcmem, QImode, srcptr);
16964 dest = change_address (destmem, QImode, destptr);
16965 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16966 emit_label (label);
16967 LABEL_NUSES (label) = 1;
16972 rtx offset = force_reg (Pmode, const0_rtx);
16977 rtx label = ix86_expand_aligntest (count, 4, true);
16978 src = change_address (srcmem, SImode, srcptr);
16979 dest = change_address (destmem, SImode, destptr);
16980 emit_move_insn (dest, src);
16981 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
16982 true, OPTAB_LIB_WIDEN);
16984 emit_move_insn (offset, tmp);
16985 emit_label (label);
16986 LABEL_NUSES (label) = 1;
16990 rtx label = ix86_expand_aligntest (count, 2, true);
16991 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16992 src = change_address (srcmem, HImode, tmp);
16993 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16994 dest = change_address (destmem, HImode, tmp);
16995 emit_move_insn (dest, src);
16996 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
16997 true, OPTAB_LIB_WIDEN);
16999 emit_move_insn (offset, tmp);
17000 emit_label (label);
17001 LABEL_NUSES (label) = 1;
17005 rtx label = ix86_expand_aligntest (count, 1, true);
17006 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17007 src = change_address (srcmem, QImode, tmp);
17008 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17009 dest = change_address (destmem, QImode, tmp);
17010 emit_move_insn (dest, src);
17011 emit_label (label);
17012 LABEL_NUSES (label) = 1;
17017 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17019 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17020 rtx count, int max_size)
17023 expand_simple_binop (counter_mode (count), AND, count,
17024 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17025 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17026 gen_lowpart (QImode, value), count, QImode,
17030 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17032 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17036 if (CONST_INT_P (count))
17038 HOST_WIDE_INT countval = INTVAL (count);
17041 if ((countval & 0x10) && max_size > 16)
17045 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17046 emit_insn (gen_strset (destptr, dest, value));
17047 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17048 emit_insn (gen_strset (destptr, dest, value));
17051 gcc_unreachable ();
17054 if ((countval & 0x08) && max_size > 8)
17058 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17059 emit_insn (gen_strset (destptr, dest, value));
17063 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17064 emit_insn (gen_strset (destptr, dest, value));
17065 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17066 emit_insn (gen_strset (destptr, dest, value));
17070 if ((countval & 0x04) && max_size > 4)
17072 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17073 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17076 if ((countval & 0x02) && max_size > 2)
17078 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17079 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17082 if ((countval & 0x01) && max_size > 1)
17084 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17085 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17092 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17097 rtx label = ix86_expand_aligntest (count, 16, true);
17100 dest = change_address (destmem, DImode, destptr);
17101 emit_insn (gen_strset (destptr, dest, value));
17102 emit_insn (gen_strset (destptr, dest, value));
17106 dest = change_address (destmem, SImode, destptr);
17107 emit_insn (gen_strset (destptr, dest, value));
17108 emit_insn (gen_strset (destptr, dest, value));
17109 emit_insn (gen_strset (destptr, dest, value));
17110 emit_insn (gen_strset (destptr, dest, value));
17112 emit_label (label);
17113 LABEL_NUSES (label) = 1;
17117 rtx label = ix86_expand_aligntest (count, 8, true);
17120 dest = change_address (destmem, DImode, destptr);
17121 emit_insn (gen_strset (destptr, dest, value));
17125 dest = change_address (destmem, SImode, destptr);
17126 emit_insn (gen_strset (destptr, dest, value));
17127 emit_insn (gen_strset (destptr, dest, value));
17129 emit_label (label);
17130 LABEL_NUSES (label) = 1;
17134 rtx label = ix86_expand_aligntest (count, 4, true);
17135 dest = change_address (destmem, SImode, destptr);
17136 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17137 emit_label (label);
17138 LABEL_NUSES (label) = 1;
17142 rtx label = ix86_expand_aligntest (count, 2, true);
17143 dest = change_address (destmem, HImode, destptr);
17144 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17145 emit_label (label);
17146 LABEL_NUSES (label) = 1;
17150 rtx label = ix86_expand_aligntest (count, 1, true);
17151 dest = change_address (destmem, QImode, destptr);
17152 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17153 emit_label (label);
17154 LABEL_NUSES (label) = 1;
17158 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17159 DESIRED_ALIGNMENT. */
17161 expand_movmem_prologue (rtx destmem, rtx srcmem,
17162 rtx destptr, rtx srcptr, rtx count,
17163 int align, int desired_alignment)
17165 if (align <= 1 && desired_alignment > 1)
17167 rtx label = ix86_expand_aligntest (destptr, 1, false);
17168 srcmem = change_address (srcmem, QImode, srcptr);
17169 destmem = change_address (destmem, QImode, destptr);
17170 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17171 ix86_adjust_counter (count, 1);
17172 emit_label (label);
17173 LABEL_NUSES (label) = 1;
17175 if (align <= 2 && desired_alignment > 2)
17177 rtx label = ix86_expand_aligntest (destptr, 2, false);
17178 srcmem = change_address (srcmem, HImode, srcptr);
17179 destmem = change_address (destmem, HImode, destptr);
17180 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17181 ix86_adjust_counter (count, 2);
17182 emit_label (label);
17183 LABEL_NUSES (label) = 1;
17185 if (align <= 4 && desired_alignment > 4)
17187 rtx label = ix86_expand_aligntest (destptr, 4, false);
17188 srcmem = change_address (srcmem, SImode, srcptr);
17189 destmem = change_address (destmem, SImode, destptr);
17190 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17191 ix86_adjust_counter (count, 4);
17192 emit_label (label);
17193 LABEL_NUSES (label) = 1;
17195 gcc_assert (desired_alignment <= 8);
17198 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17199 ALIGN_BYTES is how many bytes need to be copied. */
17201 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17202 int desired_align, int align_bytes)
17205 rtx src_size, dst_size;
17207 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17208 if (src_align_bytes >= 0)
17209 src_align_bytes = desired_align - src_align_bytes;
17210 src_size = MEM_SIZE (src);
17211 dst_size = MEM_SIZE (dst);
17212 if (align_bytes & 1)
17214 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17215 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17217 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17219 if (align_bytes & 2)
17221 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17222 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17223 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17224 set_mem_align (dst, 2 * BITS_PER_UNIT);
17225 if (src_align_bytes >= 0
17226 && (src_align_bytes & 1) == (align_bytes & 1)
17227 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17228 set_mem_align (src, 2 * BITS_PER_UNIT);
17230 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17232 if (align_bytes & 4)
17234 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17235 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17236 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17237 set_mem_align (dst, 4 * BITS_PER_UNIT);
17238 if (src_align_bytes >= 0)
17240 unsigned int src_align = 0;
17241 if ((src_align_bytes & 3) == (align_bytes & 3))
17243 else if ((src_align_bytes & 1) == (align_bytes & 1))
17245 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17246 set_mem_align (src, src_align * BITS_PER_UNIT);
17249 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17251 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17252 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17253 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17254 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17255 if (src_align_bytes >= 0)
17257 unsigned int src_align = 0;
17258 if ((src_align_bytes & 7) == (align_bytes & 7))
17260 else if ((src_align_bytes & 3) == (align_bytes & 3))
17262 else if ((src_align_bytes & 1) == (align_bytes & 1))
17264 if (src_align > (unsigned int) desired_align)
17265 src_align = desired_align;
17266 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17267 set_mem_align (src, src_align * BITS_PER_UNIT);
17270 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17272 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17277 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17278 DESIRED_ALIGNMENT. */
17280 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17281 int align, int desired_alignment)
17283 if (align <= 1 && desired_alignment > 1)
17285 rtx label = ix86_expand_aligntest (destptr, 1, false);
17286 destmem = change_address (destmem, QImode, destptr);
17287 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17288 ix86_adjust_counter (count, 1);
17289 emit_label (label);
17290 LABEL_NUSES (label) = 1;
17292 if (align <= 2 && desired_alignment > 2)
17294 rtx label = ix86_expand_aligntest (destptr, 2, false);
17295 destmem = change_address (destmem, HImode, destptr);
17296 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17297 ix86_adjust_counter (count, 2);
17298 emit_label (label);
17299 LABEL_NUSES (label) = 1;
17301 if (align <= 4 && desired_alignment > 4)
17303 rtx label = ix86_expand_aligntest (destptr, 4, false);
17304 destmem = change_address (destmem, SImode, destptr);
17305 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17306 ix86_adjust_counter (count, 4);
17307 emit_label (label);
17308 LABEL_NUSES (label) = 1;
17310 gcc_assert (desired_alignment <= 8);
17313 /* Set enough from DST to align DST known to by aligned by ALIGN to
17314 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17316 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17317 int desired_align, int align_bytes)
17320 rtx dst_size = MEM_SIZE (dst);
17321 if (align_bytes & 1)
17323 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17325 emit_insn (gen_strset (destreg, dst,
17326 gen_lowpart (QImode, value)));
17328 if (align_bytes & 2)
17330 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17331 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17332 set_mem_align (dst, 2 * BITS_PER_UNIT);
17334 emit_insn (gen_strset (destreg, dst,
17335 gen_lowpart (HImode, value)));
17337 if (align_bytes & 4)
17339 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17340 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17341 set_mem_align (dst, 4 * BITS_PER_UNIT);
17343 emit_insn (gen_strset (destreg, dst,
17344 gen_lowpart (SImode, value)));
17346 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17347 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17348 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17350 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17354 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17355 static enum stringop_alg
17356 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17357 int *dynamic_check)
17359 const struct stringop_algs * algs;
17360 bool optimize_for_speed;
17361 /* Algorithms using the rep prefix want at least edi and ecx;
17362 additionally, memset wants eax and memcpy wants esi. Don't
17363 consider such algorithms if the user has appropriated those
17364 registers for their own purposes. */
17365 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17367 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17369 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17370 || (alg != rep_prefix_1_byte \
17371 && alg != rep_prefix_4_byte \
17372 && alg != rep_prefix_8_byte))
17373 const struct processor_costs *cost;
17375 /* Even if the string operation call is cold, we still might spend a lot
17376 of time processing large blocks. */
17377 if (optimize_function_for_size_p (cfun)
17378 || (optimize_insn_for_size_p ()
17379 && expected_size != -1 && expected_size < 256))
17380 optimize_for_speed = false;
17382 optimize_for_speed = true;
17384 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17386 *dynamic_check = -1;
17388 algs = &cost->memset[TARGET_64BIT != 0];
17390 algs = &cost->memcpy[TARGET_64BIT != 0];
17391 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17392 return stringop_alg;
17393 /* rep; movq or rep; movl is the smallest variant. */
17394 else if (!optimize_for_speed)
17396 if (!count || (count & 3))
17397 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17399 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17401 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17403 else if (expected_size != -1 && expected_size < 4)
17404 return loop_1_byte;
17405 else if (expected_size != -1)
17408 enum stringop_alg alg = libcall;
17409 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17411 /* We get here if the algorithms that were not libcall-based
17412 were rep-prefix based and we are unable to use rep prefixes
17413 based on global register usage. Break out of the loop and
17414 use the heuristic below. */
17415 if (algs->size[i].max == 0)
17417 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17419 enum stringop_alg candidate = algs->size[i].alg;
17421 if (candidate != libcall && ALG_USABLE_P (candidate))
17423 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17424 last non-libcall inline algorithm. */
17425 if (TARGET_INLINE_ALL_STRINGOPS)
17427 /* When the current size is best to be copied by a libcall,
17428 but we are still forced to inline, run the heuristic below
17429 that will pick code for medium sized blocks. */
17430 if (alg != libcall)
17434 else if (ALG_USABLE_P (candidate))
17438 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17440 /* When asked to inline the call anyway, try to pick meaningful choice.
17441 We look for maximal size of block that is faster to copy by hand and
17442 take blocks of at most of that size guessing that average size will
17443 be roughly half of the block.
17445 If this turns out to be bad, we might simply specify the preferred
17446 choice in ix86_costs. */
17447 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17448 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
17451 enum stringop_alg alg;
17453 bool any_alg_usable_p = true;
17455 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17457 enum stringop_alg candidate = algs->size[i].alg;
17458 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
17460 if (candidate != libcall && candidate
17461 && ALG_USABLE_P (candidate))
17462 max = algs->size[i].max;
17464 /* If there aren't any usable algorithms, then recursing on
17465 smaller sizes isn't going to find anything. Just return the
17466 simple byte-at-a-time copy loop. */
17467 if (!any_alg_usable_p)
17469 /* Pick something reasonable. */
17470 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17471 *dynamic_check = 128;
17472 return loop_1_byte;
17476 alg = decide_alg (count, max / 2, memset, dynamic_check);
17477 gcc_assert (*dynamic_check == -1);
17478 gcc_assert (alg != libcall);
17479 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17480 *dynamic_check = max;
17483 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
17484 #undef ALG_USABLE_P
17487 /* Decide on alignment. We know that the operand is already aligned to ALIGN
17488 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
17490 decide_alignment (int align,
17491 enum stringop_alg alg,
17494 int desired_align = 0;
17498 gcc_unreachable ();
17500 case unrolled_loop:
17501 desired_align = GET_MODE_SIZE (Pmode);
17503 case rep_prefix_8_byte:
17506 case rep_prefix_4_byte:
17507 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17508 copying whole cacheline at once. */
17509 if (TARGET_PENTIUMPRO)
17514 case rep_prefix_1_byte:
17515 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17516 copying whole cacheline at once. */
17517 if (TARGET_PENTIUMPRO)
17531 if (desired_align < align)
17532 desired_align = align;
17533 if (expected_size != -1 && expected_size < 4)
17534 desired_align = align;
17535 return desired_align;
17538 /* Return the smallest power of 2 greater than VAL. */
17540 smallest_pow2_greater_than (int val)
17548 /* Expand string move (memcpy) operation. Use i386 string operations when
17549 profitable. expand_setmem contains similar code. The code depends upon
17550 architecture, block size and alignment, but always has the same
17553 1) Prologue guard: Conditional that jumps up to epilogues for small
17554 blocks that can be handled by epilogue alone. This is faster but
17555 also needed for correctness, since prologue assume the block is larger
17556 than the desired alignment.
17558 Optional dynamic check for size and libcall for large
17559 blocks is emitted here too, with -minline-stringops-dynamically.
17561 2) Prologue: copy first few bytes in order to get destination aligned
17562 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
17563 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
17564 We emit either a jump tree on power of two sized blocks, or a byte loop.
17566 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
17567 with specified algorithm.
17569 4) Epilogue: code copying tail of the block that is too small to be
17570 handled by main body (or up to size guarded by prologue guard). */
17573 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
17574 rtx expected_align_exp, rtx expected_size_exp)
17580 rtx jump_around_label = NULL;
17581 HOST_WIDE_INT align = 1;
17582 unsigned HOST_WIDE_INT count = 0;
17583 HOST_WIDE_INT expected_size = -1;
17584 int size_needed = 0, epilogue_size_needed;
17585 int desired_align = 0, align_bytes = 0;
17586 enum stringop_alg alg;
17588 bool need_zero_guard = false;
17590 if (CONST_INT_P (align_exp))
17591 align = INTVAL (align_exp);
17592 /* i386 can do misaligned access on reasonably increased cost. */
17593 if (CONST_INT_P (expected_align_exp)
17594 && INTVAL (expected_align_exp) > align)
17595 align = INTVAL (expected_align_exp);
17596 /* ALIGN is the minimum of destination and source alignment, but we care here
17597 just about destination alignment. */
17598 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
17599 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
17601 if (CONST_INT_P (count_exp))
17602 count = expected_size = INTVAL (count_exp);
17603 if (CONST_INT_P (expected_size_exp) && count == 0)
17604 expected_size = INTVAL (expected_size_exp);
17606 /* Make sure we don't need to care about overflow later on. */
17607 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17610 /* Step 0: Decide on preferred algorithm, desired alignment and
17611 size of chunks to be copied by main loop. */
17613 alg = decide_alg (count, expected_size, false, &dynamic_check);
17614 desired_align = decide_alignment (align, alg, expected_size);
17616 if (!TARGET_ALIGN_STRINGOPS)
17617 align = desired_align;
17619 if (alg == libcall)
17621 gcc_assert (alg != no_stringop);
17623 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
17624 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17625 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
17630 gcc_unreachable ();
17632 need_zero_guard = true;
17633 size_needed = GET_MODE_SIZE (Pmode);
17635 case unrolled_loop:
17636 need_zero_guard = true;
17637 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
17639 case rep_prefix_8_byte:
17642 case rep_prefix_4_byte:
17645 case rep_prefix_1_byte:
17649 need_zero_guard = true;
17654 epilogue_size_needed = size_needed;
17656 /* Step 1: Prologue guard. */
17658 /* Alignment code needs count to be in register. */
17659 if (CONST_INT_P (count_exp) && desired_align > align)
17661 if (INTVAL (count_exp) > desired_align
17662 && INTVAL (count_exp) > size_needed)
17665 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
17666 if (align_bytes <= 0)
17669 align_bytes = desired_align - align_bytes;
17671 if (align_bytes == 0)
17672 count_exp = force_reg (counter_mode (count_exp), count_exp);
17674 gcc_assert (desired_align >= 1 && align >= 1);
17676 /* Ensure that alignment prologue won't copy past end of block. */
17677 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17679 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17680 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17681 Make sure it is power of 2. */
17682 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17686 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
17688 /* If main algorithm works on QImode, no epilogue is needed.
17689 For small sizes just don't align anything. */
17690 if (size_needed == 1)
17691 desired_align = align;
17698 label = gen_label_rtx ();
17699 emit_cmp_and_jump_insns (count_exp,
17700 GEN_INT (epilogue_size_needed),
17701 LTU, 0, counter_mode (count_exp), 1, label);
17702 if (expected_size == -1 || expected_size < epilogue_size_needed)
17703 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17705 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17709 /* Emit code to decide on runtime whether library call or inline should be
17711 if (dynamic_check != -1)
17713 if (CONST_INT_P (count_exp))
17715 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
17717 emit_block_move_via_libcall (dst, src, count_exp, false);
17718 count_exp = const0_rtx;
17724 rtx hot_label = gen_label_rtx ();
17725 jump_around_label = gen_label_rtx ();
17726 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17727 LEU, 0, GET_MODE (count_exp), 1, hot_label);
17728 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17729 emit_block_move_via_libcall (dst, src, count_exp, false);
17730 emit_jump (jump_around_label);
17731 emit_label (hot_label);
17735 /* Step 2: Alignment prologue. */
17737 if (desired_align > align)
17739 if (align_bytes == 0)
17741 /* Except for the first move in epilogue, we no longer know
17742 constant offset in aliasing info. It don't seems to worth
17743 the pain to maintain it for the first move, so throw away
17745 src = change_address (src, BLKmode, srcreg);
17746 dst = change_address (dst, BLKmode, destreg);
17747 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
17752 /* If we know how many bytes need to be stored before dst is
17753 sufficiently aligned, maintain aliasing info accurately. */
17754 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
17755 desired_align, align_bytes);
17756 count_exp = plus_constant (count_exp, -align_bytes);
17757 count -= align_bytes;
17759 if (need_zero_guard
17760 && (count < (unsigned HOST_WIDE_INT) size_needed
17761 || (align_bytes == 0
17762 && count < ((unsigned HOST_WIDE_INT) size_needed
17763 + desired_align - align))))
17765 /* It is possible that we copied enough so the main loop will not
17767 gcc_assert (size_needed > 1);
17768 if (label == NULL_RTX)
17769 label = gen_label_rtx ();
17770 emit_cmp_and_jump_insns (count_exp,
17771 GEN_INT (size_needed),
17772 LTU, 0, counter_mode (count_exp), 1, label);
17773 if (expected_size == -1
17774 || expected_size < (desired_align - align) / 2 + size_needed)
17775 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17777 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17780 if (label && size_needed == 1)
17782 emit_label (label);
17783 LABEL_NUSES (label) = 1;
17785 epilogue_size_needed = 1;
17787 else if (label == NULL_RTX)
17788 epilogue_size_needed = size_needed;
17790 /* Step 3: Main loop. */
17796 gcc_unreachable ();
17798 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17799 count_exp, QImode, 1, expected_size);
17802 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17803 count_exp, Pmode, 1, expected_size);
17805 case unrolled_loop:
17806 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
17807 registers for 4 temporaries anyway. */
17808 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17809 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
17812 case rep_prefix_8_byte:
17813 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17816 case rep_prefix_4_byte:
17817 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17820 case rep_prefix_1_byte:
17821 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17825 /* Adjust properly the offset of src and dest memory for aliasing. */
17826 if (CONST_INT_P (count_exp))
17828 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
17829 (count / size_needed) * size_needed);
17830 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17831 (count / size_needed) * size_needed);
17835 src = change_address (src, BLKmode, srcreg);
17836 dst = change_address (dst, BLKmode, destreg);
17839 /* Step 4: Epilogue to copy the remaining bytes. */
17843 /* When the main loop is done, COUNT_EXP might hold original count,
17844 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17845 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17846 bytes. Compensate if needed. */
17848 if (size_needed < epilogue_size_needed)
17851 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17852 GEN_INT (size_needed - 1), count_exp, 1,
17854 if (tmp != count_exp)
17855 emit_move_insn (count_exp, tmp);
17857 emit_label (label);
17858 LABEL_NUSES (label) = 1;
17861 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17862 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
17863 epilogue_size_needed);
17864 if (jump_around_label)
17865 emit_label (jump_around_label);
17869 /* Helper function for memcpy. For QImode value 0xXY produce
17870 0xXYXYXYXY of wide specified by MODE. This is essentially
17871 a * 0x10101010, but we can do slightly better than
17872 synth_mult by unwinding the sequence by hand on CPUs with
17875 promote_duplicated_reg (enum machine_mode mode, rtx val)
17877 enum machine_mode valmode = GET_MODE (val);
17879 int nops = mode == DImode ? 3 : 2;
17881 gcc_assert (mode == SImode || mode == DImode);
17882 if (val == const0_rtx)
17883 return copy_to_mode_reg (mode, const0_rtx);
17884 if (CONST_INT_P (val))
17886 HOST_WIDE_INT v = INTVAL (val) & 255;
17890 if (mode == DImode)
17891 v |= (v << 16) << 16;
17892 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
17895 if (valmode == VOIDmode)
17897 if (valmode != QImode)
17898 val = gen_lowpart (QImode, val);
17899 if (mode == QImode)
17901 if (!TARGET_PARTIAL_REG_STALL)
17903 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
17904 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
17905 <= (ix86_cost->shift_const + ix86_cost->add) * nops
17906 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
17908 rtx reg = convert_modes (mode, QImode, val, true);
17909 tmp = promote_duplicated_reg (mode, const1_rtx);
17910 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
17915 rtx reg = convert_modes (mode, QImode, val, true);
17917 if (!TARGET_PARTIAL_REG_STALL)
17918 if (mode == SImode)
17919 emit_insn (gen_movsi_insv_1 (reg, reg));
17921 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
17924 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
17925 NULL, 1, OPTAB_DIRECT);
17927 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17929 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
17930 NULL, 1, OPTAB_DIRECT);
17931 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17932 if (mode == SImode)
17934 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
17935 NULL, 1, OPTAB_DIRECT);
17936 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17941 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
17942 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
17943 alignment from ALIGN to DESIRED_ALIGN. */
17945 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
17950 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
17951 promoted_val = promote_duplicated_reg (DImode, val);
17952 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
17953 promoted_val = promote_duplicated_reg (SImode, val);
17954 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
17955 promoted_val = promote_duplicated_reg (HImode, val);
17957 promoted_val = val;
17959 return promoted_val;
17962 /* Expand string clear operation (bzero). Use i386 string operations when
17963 profitable. See expand_movmem comment for explanation of individual
17964 steps performed. */
17966 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
17967 rtx expected_align_exp, rtx expected_size_exp)
17972 rtx jump_around_label = NULL;
17973 HOST_WIDE_INT align = 1;
17974 unsigned HOST_WIDE_INT count = 0;
17975 HOST_WIDE_INT expected_size = -1;
17976 int size_needed = 0, epilogue_size_needed;
17977 int desired_align = 0, align_bytes = 0;
17978 enum stringop_alg alg;
17979 rtx promoted_val = NULL;
17980 bool force_loopy_epilogue = false;
17982 bool need_zero_guard = false;
17984 if (CONST_INT_P (align_exp))
17985 align = INTVAL (align_exp);
17986 /* i386 can do misaligned access on reasonably increased cost. */
17987 if (CONST_INT_P (expected_align_exp)
17988 && INTVAL (expected_align_exp) > align)
17989 align = INTVAL (expected_align_exp);
17990 if (CONST_INT_P (count_exp))
17991 count = expected_size = INTVAL (count_exp);
17992 if (CONST_INT_P (expected_size_exp) && count == 0)
17993 expected_size = INTVAL (expected_size_exp);
17995 /* Make sure we don't need to care about overflow later on. */
17996 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17999 /* Step 0: Decide on preferred algorithm, desired alignment and
18000 size of chunks to be copied by main loop. */
18002 alg = decide_alg (count, expected_size, true, &dynamic_check);
18003 desired_align = decide_alignment (align, alg, expected_size);
18005 if (!TARGET_ALIGN_STRINGOPS)
18006 align = desired_align;
18008 if (alg == libcall)
18010 gcc_assert (alg != no_stringop);
18012 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18013 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18018 gcc_unreachable ();
18020 need_zero_guard = true;
18021 size_needed = GET_MODE_SIZE (Pmode);
18023 case unrolled_loop:
18024 need_zero_guard = true;
18025 size_needed = GET_MODE_SIZE (Pmode) * 4;
18027 case rep_prefix_8_byte:
18030 case rep_prefix_4_byte:
18033 case rep_prefix_1_byte:
18037 need_zero_guard = true;
18041 epilogue_size_needed = size_needed;
18043 /* Step 1: Prologue guard. */
18045 /* Alignment code needs count to be in register. */
18046 if (CONST_INT_P (count_exp) && desired_align > align)
18048 if (INTVAL (count_exp) > desired_align
18049 && INTVAL (count_exp) > size_needed)
18052 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18053 if (align_bytes <= 0)
18056 align_bytes = desired_align - align_bytes;
18058 if (align_bytes == 0)
18060 enum machine_mode mode = SImode;
18061 if (TARGET_64BIT && (count & ~0xffffffff))
18063 count_exp = force_reg (mode, count_exp);
18066 /* Do the cheap promotion to allow better CSE across the
18067 main loop and epilogue (ie one load of the big constant in the
18068 front of all code. */
18069 if (CONST_INT_P (val_exp))
18070 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18071 desired_align, align);
18072 /* Ensure that alignment prologue won't copy past end of block. */
18073 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18075 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18076 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18077 Make sure it is power of 2. */
18078 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18080 /* To improve performance of small blocks, we jump around the VAL
18081 promoting mode. This mean that if the promoted VAL is not constant,
18082 we might not use it in the epilogue and have to use byte
18084 if (epilogue_size_needed > 2 && !promoted_val)
18085 force_loopy_epilogue = true;
18088 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18090 /* If main algorithm works on QImode, no epilogue is needed.
18091 For small sizes just don't align anything. */
18092 if (size_needed == 1)
18093 desired_align = align;
18100 label = gen_label_rtx ();
18101 emit_cmp_and_jump_insns (count_exp,
18102 GEN_INT (epilogue_size_needed),
18103 LTU, 0, counter_mode (count_exp), 1, label);
18104 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18105 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18107 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18110 if (dynamic_check != -1)
18112 rtx hot_label = gen_label_rtx ();
18113 jump_around_label = gen_label_rtx ();
18114 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18115 LEU, 0, counter_mode (count_exp), 1, hot_label);
18116 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18117 set_storage_via_libcall (dst, count_exp, val_exp, false);
18118 emit_jump (jump_around_label);
18119 emit_label (hot_label);
18122 /* Step 2: Alignment prologue. */
18124 /* Do the expensive promotion once we branched off the small blocks. */
18126 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18127 desired_align, align);
18128 gcc_assert (desired_align >= 1 && align >= 1);
18130 if (desired_align > align)
18132 if (align_bytes == 0)
18134 /* Except for the first move in epilogue, we no longer know
18135 constant offset in aliasing info. It don't seems to worth
18136 the pain to maintain it for the first move, so throw away
18138 dst = change_address (dst, BLKmode, destreg);
18139 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18144 /* If we know how many bytes need to be stored before dst is
18145 sufficiently aligned, maintain aliasing info accurately. */
18146 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18147 desired_align, align_bytes);
18148 count_exp = plus_constant (count_exp, -align_bytes);
18149 count -= align_bytes;
18151 if (need_zero_guard
18152 && (count < (unsigned HOST_WIDE_INT) size_needed
18153 || (align_bytes == 0
18154 && count < ((unsigned HOST_WIDE_INT) size_needed
18155 + desired_align - align))))
18157 /* It is possible that we copied enough so the main loop will not
18159 gcc_assert (size_needed > 1);
18160 if (label == NULL_RTX)
18161 label = gen_label_rtx ();
18162 emit_cmp_and_jump_insns (count_exp,
18163 GEN_INT (size_needed),
18164 LTU, 0, counter_mode (count_exp), 1, label);
18165 if (expected_size == -1
18166 || expected_size < (desired_align - align) / 2 + size_needed)
18167 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18169 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18172 if (label && size_needed == 1)
18174 emit_label (label);
18175 LABEL_NUSES (label) = 1;
18177 promoted_val = val_exp;
18178 epilogue_size_needed = 1;
18180 else if (label == NULL_RTX)
18181 epilogue_size_needed = size_needed;
18183 /* Step 3: Main loop. */
18189 gcc_unreachable ();
18191 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18192 count_exp, QImode, 1, expected_size);
18195 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18196 count_exp, Pmode, 1, expected_size);
18198 case unrolled_loop:
18199 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18200 count_exp, Pmode, 4, expected_size);
18202 case rep_prefix_8_byte:
18203 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18206 case rep_prefix_4_byte:
18207 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18210 case rep_prefix_1_byte:
18211 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18215 /* Adjust properly the offset of src and dest memory for aliasing. */
18216 if (CONST_INT_P (count_exp))
18217 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18218 (count / size_needed) * size_needed);
18220 dst = change_address (dst, BLKmode, destreg);
18222 /* Step 4: Epilogue to copy the remaining bytes. */
18226 /* When the main loop is done, COUNT_EXP might hold original count,
18227 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18228 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18229 bytes. Compensate if needed. */
18231 if (size_needed < epilogue_size_needed)
18234 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18235 GEN_INT (size_needed - 1), count_exp, 1,
18237 if (tmp != count_exp)
18238 emit_move_insn (count_exp, tmp);
18240 emit_label (label);
18241 LABEL_NUSES (label) = 1;
18244 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18246 if (force_loopy_epilogue)
18247 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18248 epilogue_size_needed);
18250 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18251 epilogue_size_needed);
18253 if (jump_around_label)
18254 emit_label (jump_around_label);
18258 /* Expand the appropriate insns for doing strlen if not just doing
18261 out = result, initialized with the start address
18262 align_rtx = alignment of the address.
18263 scratch = scratch register, initialized with the startaddress when
18264 not aligned, otherwise undefined
18266 This is just the body. It needs the initializations mentioned above and
18267 some address computing at the end. These things are done in i386.md. */
18270 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18274 rtx align_2_label = NULL_RTX;
18275 rtx align_3_label = NULL_RTX;
18276 rtx align_4_label = gen_label_rtx ();
18277 rtx end_0_label = gen_label_rtx ();
18279 rtx tmpreg = gen_reg_rtx (SImode);
18280 rtx scratch = gen_reg_rtx (SImode);
18284 if (CONST_INT_P (align_rtx))
18285 align = INTVAL (align_rtx);
18287 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18289 /* Is there a known alignment and is it less than 4? */
18292 rtx scratch1 = gen_reg_rtx (Pmode);
18293 emit_move_insn (scratch1, out);
18294 /* Is there a known alignment and is it not 2? */
18297 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18298 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18300 /* Leave just the 3 lower bits. */
18301 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18302 NULL_RTX, 0, OPTAB_WIDEN);
18304 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18305 Pmode, 1, align_4_label);
18306 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18307 Pmode, 1, align_2_label);
18308 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18309 Pmode, 1, align_3_label);
18313 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18314 check if is aligned to 4 - byte. */
18316 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18317 NULL_RTX, 0, OPTAB_WIDEN);
18319 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18320 Pmode, 1, align_4_label);
18323 mem = change_address (src, QImode, out);
18325 /* Now compare the bytes. */
18327 /* Compare the first n unaligned byte on a byte per byte basis. */
18328 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18329 QImode, 1, end_0_label);
18331 /* Increment the address. */
18332 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18334 /* Not needed with an alignment of 2 */
18337 emit_label (align_2_label);
18339 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18342 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18344 emit_label (align_3_label);
18347 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18350 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18353 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18354 align this loop. It gives only huge programs, but does not help to
18356 emit_label (align_4_label);
18358 mem = change_address (src, SImode, out);
18359 emit_move_insn (scratch, mem);
18360 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18362 /* This formula yields a nonzero result iff one of the bytes is zero.
18363 This saves three branches inside loop and many cycles. */
18365 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18366 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18367 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18368 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18369 gen_int_mode (0x80808080, SImode)));
18370 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18375 rtx reg = gen_reg_rtx (SImode);
18376 rtx reg2 = gen_reg_rtx (Pmode);
18377 emit_move_insn (reg, tmpreg);
18378 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18380 /* If zero is not in the first two bytes, move two bytes forward. */
18381 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18382 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18383 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18384 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18385 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18388 /* Emit lea manually to avoid clobbering of flags. */
18389 emit_insn (gen_rtx_SET (SImode, reg2,
18390 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18392 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18393 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18394 emit_insn (gen_rtx_SET (VOIDmode, out,
18395 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18402 rtx end_2_label = gen_label_rtx ();
18403 /* Is zero in the first two bytes? */
18405 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18406 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18407 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18408 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18409 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18411 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18412 JUMP_LABEL (tmp) = end_2_label;
18414 /* Not in the first two. Move two bytes forward. */
18415 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18416 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18418 emit_label (end_2_label);
18422 /* Avoid branch in fixing the byte. */
18423 tmpreg = gen_lowpart (QImode, tmpreg);
18424 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
18425 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18426 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
18428 emit_label (end_0_label);
18431 /* Expand strlen. */
18434 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
18436 rtx addr, scratch1, scratch2, scratch3, scratch4;
18438 /* The generic case of strlen expander is long. Avoid it's
18439 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
18441 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18442 && !TARGET_INLINE_ALL_STRINGOPS
18443 && !optimize_insn_for_size_p ()
18444 && (!CONST_INT_P (align) || INTVAL (align) < 4))
18447 addr = force_reg (Pmode, XEXP (src, 0));
18448 scratch1 = gen_reg_rtx (Pmode);
18450 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18451 && !optimize_insn_for_size_p ())
18453 /* Well it seems that some optimizer does not combine a call like
18454 foo(strlen(bar), strlen(bar));
18455 when the move and the subtraction is done here. It does calculate
18456 the length just once when these instructions are done inside of
18457 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
18458 often used and I use one fewer register for the lifetime of
18459 output_strlen_unroll() this is better. */
18461 emit_move_insn (out, addr);
18463 ix86_expand_strlensi_unroll_1 (out, src, align);
18465 /* strlensi_unroll_1 returns the address of the zero at the end of
18466 the string, like memchr(), so compute the length by subtracting
18467 the start address. */
18468 emit_insn ((*ix86_gen_sub3) (out, out, addr));
18474 /* Can't use this if the user has appropriated eax, ecx, or edi. */
18475 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
18478 scratch2 = gen_reg_rtx (Pmode);
18479 scratch3 = gen_reg_rtx (Pmode);
18480 scratch4 = force_reg (Pmode, constm1_rtx);
18482 emit_move_insn (scratch3, addr);
18483 eoschar = force_reg (QImode, eoschar);
18485 src = replace_equiv_address_nv (src, scratch3);
18487 /* If .md starts supporting :P, this can be done in .md. */
18488 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
18489 scratch4), UNSPEC_SCAS);
18490 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
18491 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
18492 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
18497 /* For given symbol (function) construct code to compute address of it's PLT
18498 entry in large x86-64 PIC model. */
18500 construct_plt_address (rtx symbol)
18502 rtx tmp = gen_reg_rtx (Pmode);
18503 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
18505 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
18506 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
18508 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
18509 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
18514 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
18516 rtx pop, int sibcall)
18518 rtx use = NULL, call;
18519 enum calling_abi function_call_abi;
18521 if (callarg2 && INTVAL (callarg2) == -2)
18522 function_call_abi = MS_ABI;
18524 function_call_abi = SYSV_ABI;
18525 if (pop == const0_rtx)
18527 gcc_assert (!TARGET_64BIT || !pop);
18529 if (TARGET_MACHO && !TARGET_64BIT)
18532 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
18533 fnaddr = machopic_indirect_call_target (fnaddr);
18538 /* Static functions and indirect calls don't need the pic register. */
18539 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
18540 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18541 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
18542 use_reg (&use, pic_offset_table_rtx);
18545 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
18547 rtx al = gen_rtx_REG (QImode, AX_REG);
18548 emit_move_insn (al, callarg2);
18549 use_reg (&use, al);
18552 if (ix86_cmodel == CM_LARGE_PIC
18553 && GET_CODE (fnaddr) == MEM
18554 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18555 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
18556 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
18557 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
18559 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18560 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18562 if (sibcall && TARGET_64BIT
18563 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
18566 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18567 fnaddr = gen_rtx_REG (Pmode, R11_REG);
18568 emit_move_insn (fnaddr, addr);
18569 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18572 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
18574 call = gen_rtx_SET (VOIDmode, retval, call);
18577 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
18578 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
18579 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
18580 gcc_assert (ix86_cfun_abi () != MS_ABI || function_call_abi != SYSV_ABI);
18582 /* We need to represent that SI and DI registers are clobbered
18584 if (ix86_cfun_abi () == MS_ABI && function_call_abi == SYSV_ABI)
18586 static int clobbered_registers[] = {27, 28, 45, 46, 47, 48, 49, 50, 51,
18587 52, SI_REG, DI_REG};
18589 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
18590 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
18591 UNSPEC_MS_TO_SYSV_CALL);
18595 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
18596 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
18599 (SSE_REGNO_P (clobbered_registers[i])
18601 clobbered_registers[i]));
18603 call = gen_rtx_PARALLEL (VOIDmode,
18604 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
18608 call = emit_call_insn (call);
18610 CALL_INSN_FUNCTION_USAGE (call) = use;
18614 /* Clear stack slot assignments remembered from previous functions.
18615 This is called from INIT_EXPANDERS once before RTL is emitted for each
18618 static struct machine_function *
18619 ix86_init_machine_status (void)
18621 struct machine_function *f;
18623 f = GGC_CNEW (struct machine_function);
18624 f->use_fast_prologue_epilogue_nregs = -1;
18625 f->tls_descriptor_call_expanded_p = 0;
18626 f->call_abi = DEFAULT_ABI;
18631 /* Return a MEM corresponding to a stack slot with mode MODE.
18632 Allocate a new slot if necessary.
18634 The RTL for a function can have several slots available: N is
18635 which slot to use. */
18638 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
18640 struct stack_local_entry *s;
18642 gcc_assert (n < MAX_386_STACK_LOCALS);
18644 /* Virtual slot is valid only before vregs are instantiated. */
18645 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
18647 for (s = ix86_stack_locals; s; s = s->next)
18648 if (s->mode == mode && s->n == n)
18649 return copy_rtx (s->rtl);
18651 s = (struct stack_local_entry *)
18652 ggc_alloc (sizeof (struct stack_local_entry));
18655 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18657 s->next = ix86_stack_locals;
18658 ix86_stack_locals = s;
18662 /* Construct the SYMBOL_REF for the tls_get_addr function. */
18664 static GTY(()) rtx ix86_tls_symbol;
18666 ix86_tls_get_addr (void)
18669 if (!ix86_tls_symbol)
18671 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
18672 (TARGET_ANY_GNU_TLS
18674 ? "___tls_get_addr"
18675 : "__tls_get_addr");
18678 return ix86_tls_symbol;
18681 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
18683 static GTY(()) rtx ix86_tls_module_base_symbol;
18685 ix86_tls_module_base (void)
18688 if (!ix86_tls_module_base_symbol)
18690 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
18691 "_TLS_MODULE_BASE_");
18692 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
18693 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
18696 return ix86_tls_module_base_symbol;
18699 /* Calculate the length of the memory address in the instruction
18700 encoding. Does not include the one-byte modrm, opcode, or prefix. */
18703 memory_address_length (rtx addr)
18705 struct ix86_address parts;
18706 rtx base, index, disp;
18710 if (GET_CODE (addr) == PRE_DEC
18711 || GET_CODE (addr) == POST_INC
18712 || GET_CODE (addr) == PRE_MODIFY
18713 || GET_CODE (addr) == POST_MODIFY)
18716 ok = ix86_decompose_address (addr, &parts);
18719 if (parts.base && GET_CODE (parts.base) == SUBREG)
18720 parts.base = SUBREG_REG (parts.base);
18721 if (parts.index && GET_CODE (parts.index) == SUBREG)
18722 parts.index = SUBREG_REG (parts.index);
18725 index = parts.index;
18730 - esp as the base always wants an index,
18731 - ebp as the base always wants a displacement. */
18733 /* Register Indirect. */
18734 if (base && !index && !disp)
18736 /* esp (for its index) and ebp (for its displacement) need
18737 the two-byte modrm form. */
18738 if (addr == stack_pointer_rtx
18739 || addr == arg_pointer_rtx
18740 || addr == frame_pointer_rtx
18741 || addr == hard_frame_pointer_rtx)
18745 /* Direct Addressing. */
18746 else if (disp && !base && !index)
18751 /* Find the length of the displacement constant. */
18754 if (base && satisfies_constraint_K (disp))
18759 /* ebp always wants a displacement. */
18760 else if (base == hard_frame_pointer_rtx)
18763 /* An index requires the two-byte modrm form.... */
18765 /* ...like esp, which always wants an index. */
18766 || base == stack_pointer_rtx
18767 || base == arg_pointer_rtx
18768 || base == frame_pointer_rtx)
18775 /* Compute default value for "length_immediate" attribute. When SHORTFORM
18776 is set, expect that insn have 8bit immediate alternative. */
18778 ix86_attr_length_immediate_default (rtx insn, int shortform)
18782 extract_insn_cached (insn);
18783 for (i = recog_data.n_operands - 1; i >= 0; --i)
18784 if (CONSTANT_P (recog_data.operand[i]))
18787 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
18791 switch (get_attr_mode (insn))
18802 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
18807 fatal_insn ("unknown insn mode", insn);
18813 /* Compute default value for "length_address" attribute. */
18815 ix86_attr_length_address_default (rtx insn)
18819 if (get_attr_type (insn) == TYPE_LEA)
18821 rtx set = PATTERN (insn);
18823 if (GET_CODE (set) == PARALLEL)
18824 set = XVECEXP (set, 0, 0);
18826 gcc_assert (GET_CODE (set) == SET);
18828 return memory_address_length (SET_SRC (set));
18831 extract_insn_cached (insn);
18832 for (i = recog_data.n_operands - 1; i >= 0; --i)
18833 if (MEM_P (recog_data.operand[i]))
18835 return memory_address_length (XEXP (recog_data.operand[i], 0));
18841 /* Compute default value for "length_vex" attribute. It includes
18842 2 or 3 byte VEX prefix and 1 opcode byte. */
18845 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
18850 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
18851 byte VEX prefix. */
18852 if (!has_0f_opcode || has_vex_w)
18855 /* We can always use 2 byte VEX prefix in 32bit. */
18859 extract_insn_cached (insn);
18861 for (i = recog_data.n_operands - 1; i >= 0; --i)
18862 if (REG_P (recog_data.operand[i]))
18864 /* REX.W bit uses 3 byte VEX prefix. */
18865 if (GET_MODE (recog_data.operand[i]) == DImode)
18870 /* REX.X or REX.B bits use 3 byte VEX prefix. */
18871 if (MEM_P (recog_data.operand[i])
18872 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
18879 /* Return the maximum number of instructions a cpu can issue. */
18882 ix86_issue_rate (void)
18886 case PROCESSOR_PENTIUM:
18890 case PROCESSOR_PENTIUMPRO:
18891 case PROCESSOR_PENTIUM4:
18892 case PROCESSOR_ATHLON:
18894 case PROCESSOR_AMDFAM10:
18895 case PROCESSOR_NOCONA:
18896 case PROCESSOR_GENERIC32:
18897 case PROCESSOR_GENERIC64:
18900 case PROCESSOR_CORE2:
18908 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
18909 by DEP_INSN and nothing set by DEP_INSN. */
18912 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
18916 /* Simplify the test for uninteresting insns. */
18917 if (insn_type != TYPE_SETCC
18918 && insn_type != TYPE_ICMOV
18919 && insn_type != TYPE_FCMOV
18920 && insn_type != TYPE_IBR)
18923 if ((set = single_set (dep_insn)) != 0)
18925 set = SET_DEST (set);
18928 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
18929 && XVECLEN (PATTERN (dep_insn), 0) == 2
18930 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
18931 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
18933 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
18934 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
18939 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
18942 /* This test is true if the dependent insn reads the flags but
18943 not any other potentially set register. */
18944 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
18947 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
18953 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
18954 address with operands set by DEP_INSN. */
18957 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
18961 if (insn_type == TYPE_LEA
18964 addr = PATTERN (insn);
18966 if (GET_CODE (addr) == PARALLEL)
18967 addr = XVECEXP (addr, 0, 0);
18969 gcc_assert (GET_CODE (addr) == SET);
18971 addr = SET_SRC (addr);
18976 extract_insn_cached (insn);
18977 for (i = recog_data.n_operands - 1; i >= 0; --i)
18978 if (MEM_P (recog_data.operand[i]))
18980 addr = XEXP (recog_data.operand[i], 0);
18987 return modified_in_p (addr, dep_insn);
18991 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
18993 enum attr_type insn_type, dep_insn_type;
18994 enum attr_memory memory;
18996 int dep_insn_code_number;
18998 /* Anti and output dependencies have zero cost on all CPUs. */
18999 if (REG_NOTE_KIND (link) != 0)
19002 dep_insn_code_number = recog_memoized (dep_insn);
19004 /* If we can't recognize the insns, we can't really do anything. */
19005 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19008 insn_type = get_attr_type (insn);
19009 dep_insn_type = get_attr_type (dep_insn);
19013 case PROCESSOR_PENTIUM:
19014 /* Address Generation Interlock adds a cycle of latency. */
19015 if (ix86_agi_dependent (insn, dep_insn, insn_type))
19018 /* ??? Compares pair with jump/setcc. */
19019 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19022 /* Floating point stores require value to be ready one cycle earlier. */
19023 if (insn_type == TYPE_FMOV
19024 && get_attr_memory (insn) == MEMORY_STORE
19025 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19029 case PROCESSOR_PENTIUMPRO:
19030 memory = get_attr_memory (insn);
19032 /* INT->FP conversion is expensive. */
19033 if (get_attr_fp_int_src (dep_insn))
19036 /* There is one cycle extra latency between an FP op and a store. */
19037 if (insn_type == TYPE_FMOV
19038 && (set = single_set (dep_insn)) != NULL_RTX
19039 && (set2 = single_set (insn)) != NULL_RTX
19040 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19041 && MEM_P (SET_DEST (set2)))
19044 /* Show ability of reorder buffer to hide latency of load by executing
19045 in parallel with previous instruction in case
19046 previous instruction is not needed to compute the address. */
19047 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19048 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19050 /* Claim moves to take one cycle, as core can issue one load
19051 at time and the next load can start cycle later. */
19052 if (dep_insn_type == TYPE_IMOV
19053 || dep_insn_type == TYPE_FMOV)
19061 memory = get_attr_memory (insn);
19063 /* The esp dependency is resolved before the instruction is really
19065 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19066 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19069 /* INT->FP conversion is expensive. */
19070 if (get_attr_fp_int_src (dep_insn))
19073 /* Show ability of reorder buffer to hide latency of load by executing
19074 in parallel with previous instruction in case
19075 previous instruction is not needed to compute the address. */
19076 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19077 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19079 /* Claim moves to take one cycle, as core can issue one load
19080 at time and the next load can start cycle later. */
19081 if (dep_insn_type == TYPE_IMOV
19082 || dep_insn_type == TYPE_FMOV)
19091 case PROCESSOR_ATHLON:
19093 case PROCESSOR_AMDFAM10:
19094 case PROCESSOR_GENERIC32:
19095 case PROCESSOR_GENERIC64:
19096 memory = get_attr_memory (insn);
19098 /* Show ability of reorder buffer to hide latency of load by executing
19099 in parallel with previous instruction in case
19100 previous instruction is not needed to compute the address. */
19101 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19102 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19104 enum attr_unit unit = get_attr_unit (insn);
19107 /* Because of the difference between the length of integer and
19108 floating unit pipeline preparation stages, the memory operands
19109 for floating point are cheaper.
19111 ??? For Athlon it the difference is most probably 2. */
19112 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19115 loadcost = TARGET_ATHLON ? 2 : 0;
19117 if (cost >= loadcost)
19130 /* How many alternative schedules to try. This should be as wide as the
19131 scheduling freedom in the DFA, but no wider. Making this value too
19132 large results extra work for the scheduler. */
19135 ia32_multipass_dfa_lookahead (void)
19139 case PROCESSOR_PENTIUM:
19142 case PROCESSOR_PENTIUMPRO:
19152 /* Compute the alignment given to a constant that is being placed in memory.
19153 EXP is the constant and ALIGN is the alignment that the object would
19155 The value of this function is used instead of that alignment to align
19159 ix86_constant_alignment (tree exp, int align)
19161 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19162 || TREE_CODE (exp) == INTEGER_CST)
19164 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19166 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19169 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19170 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19171 return BITS_PER_WORD;
19176 /* Compute the alignment for a static variable.
19177 TYPE is the data type, and ALIGN is the alignment that
19178 the object would ordinarily have. The value of this function is used
19179 instead of that alignment to align the object. */
19182 ix86_data_alignment (tree type, int align)
19184 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19186 if (AGGREGATE_TYPE_P (type)
19187 && TYPE_SIZE (type)
19188 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19189 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19190 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19191 && align < max_align)
19194 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19195 to 16byte boundary. */
19198 if (AGGREGATE_TYPE_P (type)
19199 && TYPE_SIZE (type)
19200 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19201 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19202 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19206 if (TREE_CODE (type) == ARRAY_TYPE)
19208 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19210 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19213 else if (TREE_CODE (type) == COMPLEX_TYPE)
19216 if (TYPE_MODE (type) == DCmode && align < 64)
19218 if ((TYPE_MODE (type) == XCmode
19219 || TYPE_MODE (type) == TCmode) && align < 128)
19222 else if ((TREE_CODE (type) == RECORD_TYPE
19223 || TREE_CODE (type) == UNION_TYPE
19224 || TREE_CODE (type) == QUAL_UNION_TYPE)
19225 && TYPE_FIELDS (type))
19227 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19229 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19232 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19233 || TREE_CODE (type) == INTEGER_TYPE)
19235 if (TYPE_MODE (type) == DFmode && align < 64)
19237 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19244 /* Compute the alignment for a local variable or a stack slot. TYPE is
19245 the data type, MODE is the widest mode available and ALIGN is the
19246 alignment that the object would ordinarily have. The value of this
19247 macro is used instead of that alignment to align the object. */
19250 ix86_local_alignment (tree type, enum machine_mode mode,
19251 unsigned int align)
19253 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19254 register in MODE. We will return the largest alignment of XF
19258 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19259 align = GET_MODE_ALIGNMENT (DFmode);
19263 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19264 to 16byte boundary. */
19267 if (AGGREGATE_TYPE_P (type)
19268 && TYPE_SIZE (type)
19269 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19270 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19271 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19274 if (TREE_CODE (type) == ARRAY_TYPE)
19276 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19278 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19281 else if (TREE_CODE (type) == COMPLEX_TYPE)
19283 if (TYPE_MODE (type) == DCmode && align < 64)
19285 if ((TYPE_MODE (type) == XCmode
19286 || TYPE_MODE (type) == TCmode) && align < 128)
19289 else if ((TREE_CODE (type) == RECORD_TYPE
19290 || TREE_CODE (type) == UNION_TYPE
19291 || TREE_CODE (type) == QUAL_UNION_TYPE)
19292 && TYPE_FIELDS (type))
19294 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19296 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19299 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19300 || TREE_CODE (type) == INTEGER_TYPE)
19303 if (TYPE_MODE (type) == DFmode && align < 64)
19305 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19311 /* Emit RTL insns to initialize the variable parts of a trampoline.
19312 FNADDR is an RTX for the address of the function's pure code.
19313 CXT is an RTX for the static chain value for the function. */
19315 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19319 /* Compute offset from the end of the jmp to the target function. */
19320 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19321 plus_constant (tramp, 10),
19322 NULL_RTX, 1, OPTAB_DIRECT);
19323 emit_move_insn (gen_rtx_MEM (QImode, tramp),
19324 gen_int_mode (0xb9, QImode));
19325 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19326 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19327 gen_int_mode (0xe9, QImode));
19328 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19333 /* Try to load address using shorter movl instead of movabs.
19334 We may want to support movq for kernel mode, but kernel does not use
19335 trampolines at the moment. */
19336 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19338 fnaddr = copy_to_mode_reg (DImode, fnaddr);
19339 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19340 gen_int_mode (0xbb41, HImode));
19341 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19342 gen_lowpart (SImode, fnaddr));
19347 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19348 gen_int_mode (0xbb49, HImode));
19349 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19353 /* Load static chain using movabs to r10. */
19354 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19355 gen_int_mode (0xba49, HImode));
19356 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19359 /* Jump to the r11 */
19360 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19361 gen_int_mode (0xff49, HImode));
19362 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19363 gen_int_mode (0xe3, QImode));
19365 gcc_assert (offset <= TRAMPOLINE_SIZE);
19368 #ifdef ENABLE_EXECUTE_STACK
19369 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19370 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19374 /* Codes for all the SSE/MMX builtins. */
19377 IX86_BUILTIN_ADDPS,
19378 IX86_BUILTIN_ADDSS,
19379 IX86_BUILTIN_DIVPS,
19380 IX86_BUILTIN_DIVSS,
19381 IX86_BUILTIN_MULPS,
19382 IX86_BUILTIN_MULSS,
19383 IX86_BUILTIN_SUBPS,
19384 IX86_BUILTIN_SUBSS,
19386 IX86_BUILTIN_CMPEQPS,
19387 IX86_BUILTIN_CMPLTPS,
19388 IX86_BUILTIN_CMPLEPS,
19389 IX86_BUILTIN_CMPGTPS,
19390 IX86_BUILTIN_CMPGEPS,
19391 IX86_BUILTIN_CMPNEQPS,
19392 IX86_BUILTIN_CMPNLTPS,
19393 IX86_BUILTIN_CMPNLEPS,
19394 IX86_BUILTIN_CMPNGTPS,
19395 IX86_BUILTIN_CMPNGEPS,
19396 IX86_BUILTIN_CMPORDPS,
19397 IX86_BUILTIN_CMPUNORDPS,
19398 IX86_BUILTIN_CMPEQSS,
19399 IX86_BUILTIN_CMPLTSS,
19400 IX86_BUILTIN_CMPLESS,
19401 IX86_BUILTIN_CMPNEQSS,
19402 IX86_BUILTIN_CMPNLTSS,
19403 IX86_BUILTIN_CMPNLESS,
19404 IX86_BUILTIN_CMPNGTSS,
19405 IX86_BUILTIN_CMPNGESS,
19406 IX86_BUILTIN_CMPORDSS,
19407 IX86_BUILTIN_CMPUNORDSS,
19409 IX86_BUILTIN_COMIEQSS,
19410 IX86_BUILTIN_COMILTSS,
19411 IX86_BUILTIN_COMILESS,
19412 IX86_BUILTIN_COMIGTSS,
19413 IX86_BUILTIN_COMIGESS,
19414 IX86_BUILTIN_COMINEQSS,
19415 IX86_BUILTIN_UCOMIEQSS,
19416 IX86_BUILTIN_UCOMILTSS,
19417 IX86_BUILTIN_UCOMILESS,
19418 IX86_BUILTIN_UCOMIGTSS,
19419 IX86_BUILTIN_UCOMIGESS,
19420 IX86_BUILTIN_UCOMINEQSS,
19422 IX86_BUILTIN_CVTPI2PS,
19423 IX86_BUILTIN_CVTPS2PI,
19424 IX86_BUILTIN_CVTSI2SS,
19425 IX86_BUILTIN_CVTSI642SS,
19426 IX86_BUILTIN_CVTSS2SI,
19427 IX86_BUILTIN_CVTSS2SI64,
19428 IX86_BUILTIN_CVTTPS2PI,
19429 IX86_BUILTIN_CVTTSS2SI,
19430 IX86_BUILTIN_CVTTSS2SI64,
19432 IX86_BUILTIN_MAXPS,
19433 IX86_BUILTIN_MAXSS,
19434 IX86_BUILTIN_MINPS,
19435 IX86_BUILTIN_MINSS,
19437 IX86_BUILTIN_LOADUPS,
19438 IX86_BUILTIN_STOREUPS,
19439 IX86_BUILTIN_MOVSS,
19441 IX86_BUILTIN_MOVHLPS,
19442 IX86_BUILTIN_MOVLHPS,
19443 IX86_BUILTIN_LOADHPS,
19444 IX86_BUILTIN_LOADLPS,
19445 IX86_BUILTIN_STOREHPS,
19446 IX86_BUILTIN_STORELPS,
19448 IX86_BUILTIN_MASKMOVQ,
19449 IX86_BUILTIN_MOVMSKPS,
19450 IX86_BUILTIN_PMOVMSKB,
19452 IX86_BUILTIN_MOVNTPS,
19453 IX86_BUILTIN_MOVNTQ,
19455 IX86_BUILTIN_LOADDQU,
19456 IX86_BUILTIN_STOREDQU,
19458 IX86_BUILTIN_PACKSSWB,
19459 IX86_BUILTIN_PACKSSDW,
19460 IX86_BUILTIN_PACKUSWB,
19462 IX86_BUILTIN_PADDB,
19463 IX86_BUILTIN_PADDW,
19464 IX86_BUILTIN_PADDD,
19465 IX86_BUILTIN_PADDQ,
19466 IX86_BUILTIN_PADDSB,
19467 IX86_BUILTIN_PADDSW,
19468 IX86_BUILTIN_PADDUSB,
19469 IX86_BUILTIN_PADDUSW,
19470 IX86_BUILTIN_PSUBB,
19471 IX86_BUILTIN_PSUBW,
19472 IX86_BUILTIN_PSUBD,
19473 IX86_BUILTIN_PSUBQ,
19474 IX86_BUILTIN_PSUBSB,
19475 IX86_BUILTIN_PSUBSW,
19476 IX86_BUILTIN_PSUBUSB,
19477 IX86_BUILTIN_PSUBUSW,
19480 IX86_BUILTIN_PANDN,
19484 IX86_BUILTIN_PAVGB,
19485 IX86_BUILTIN_PAVGW,
19487 IX86_BUILTIN_PCMPEQB,
19488 IX86_BUILTIN_PCMPEQW,
19489 IX86_BUILTIN_PCMPEQD,
19490 IX86_BUILTIN_PCMPGTB,
19491 IX86_BUILTIN_PCMPGTW,
19492 IX86_BUILTIN_PCMPGTD,
19494 IX86_BUILTIN_PMADDWD,
19496 IX86_BUILTIN_PMAXSW,
19497 IX86_BUILTIN_PMAXUB,
19498 IX86_BUILTIN_PMINSW,
19499 IX86_BUILTIN_PMINUB,
19501 IX86_BUILTIN_PMULHUW,
19502 IX86_BUILTIN_PMULHW,
19503 IX86_BUILTIN_PMULLW,
19505 IX86_BUILTIN_PSADBW,
19506 IX86_BUILTIN_PSHUFW,
19508 IX86_BUILTIN_PSLLW,
19509 IX86_BUILTIN_PSLLD,
19510 IX86_BUILTIN_PSLLQ,
19511 IX86_BUILTIN_PSRAW,
19512 IX86_BUILTIN_PSRAD,
19513 IX86_BUILTIN_PSRLW,
19514 IX86_BUILTIN_PSRLD,
19515 IX86_BUILTIN_PSRLQ,
19516 IX86_BUILTIN_PSLLWI,
19517 IX86_BUILTIN_PSLLDI,
19518 IX86_BUILTIN_PSLLQI,
19519 IX86_BUILTIN_PSRAWI,
19520 IX86_BUILTIN_PSRADI,
19521 IX86_BUILTIN_PSRLWI,
19522 IX86_BUILTIN_PSRLDI,
19523 IX86_BUILTIN_PSRLQI,
19525 IX86_BUILTIN_PUNPCKHBW,
19526 IX86_BUILTIN_PUNPCKHWD,
19527 IX86_BUILTIN_PUNPCKHDQ,
19528 IX86_BUILTIN_PUNPCKLBW,
19529 IX86_BUILTIN_PUNPCKLWD,
19530 IX86_BUILTIN_PUNPCKLDQ,
19532 IX86_BUILTIN_SHUFPS,
19534 IX86_BUILTIN_RCPPS,
19535 IX86_BUILTIN_RCPSS,
19536 IX86_BUILTIN_RSQRTPS,
19537 IX86_BUILTIN_RSQRTPS_NR,
19538 IX86_BUILTIN_RSQRTSS,
19539 IX86_BUILTIN_RSQRTF,
19540 IX86_BUILTIN_SQRTPS,
19541 IX86_BUILTIN_SQRTPS_NR,
19542 IX86_BUILTIN_SQRTSS,
19544 IX86_BUILTIN_UNPCKHPS,
19545 IX86_BUILTIN_UNPCKLPS,
19547 IX86_BUILTIN_ANDPS,
19548 IX86_BUILTIN_ANDNPS,
19550 IX86_BUILTIN_XORPS,
19553 IX86_BUILTIN_LDMXCSR,
19554 IX86_BUILTIN_STMXCSR,
19555 IX86_BUILTIN_SFENCE,
19557 /* 3DNow! Original */
19558 IX86_BUILTIN_FEMMS,
19559 IX86_BUILTIN_PAVGUSB,
19560 IX86_BUILTIN_PF2ID,
19561 IX86_BUILTIN_PFACC,
19562 IX86_BUILTIN_PFADD,
19563 IX86_BUILTIN_PFCMPEQ,
19564 IX86_BUILTIN_PFCMPGE,
19565 IX86_BUILTIN_PFCMPGT,
19566 IX86_BUILTIN_PFMAX,
19567 IX86_BUILTIN_PFMIN,
19568 IX86_BUILTIN_PFMUL,
19569 IX86_BUILTIN_PFRCP,
19570 IX86_BUILTIN_PFRCPIT1,
19571 IX86_BUILTIN_PFRCPIT2,
19572 IX86_BUILTIN_PFRSQIT1,
19573 IX86_BUILTIN_PFRSQRT,
19574 IX86_BUILTIN_PFSUB,
19575 IX86_BUILTIN_PFSUBR,
19576 IX86_BUILTIN_PI2FD,
19577 IX86_BUILTIN_PMULHRW,
19579 /* 3DNow! Athlon Extensions */
19580 IX86_BUILTIN_PF2IW,
19581 IX86_BUILTIN_PFNACC,
19582 IX86_BUILTIN_PFPNACC,
19583 IX86_BUILTIN_PI2FW,
19584 IX86_BUILTIN_PSWAPDSI,
19585 IX86_BUILTIN_PSWAPDSF,
19588 IX86_BUILTIN_ADDPD,
19589 IX86_BUILTIN_ADDSD,
19590 IX86_BUILTIN_DIVPD,
19591 IX86_BUILTIN_DIVSD,
19592 IX86_BUILTIN_MULPD,
19593 IX86_BUILTIN_MULSD,
19594 IX86_BUILTIN_SUBPD,
19595 IX86_BUILTIN_SUBSD,
19597 IX86_BUILTIN_CMPEQPD,
19598 IX86_BUILTIN_CMPLTPD,
19599 IX86_BUILTIN_CMPLEPD,
19600 IX86_BUILTIN_CMPGTPD,
19601 IX86_BUILTIN_CMPGEPD,
19602 IX86_BUILTIN_CMPNEQPD,
19603 IX86_BUILTIN_CMPNLTPD,
19604 IX86_BUILTIN_CMPNLEPD,
19605 IX86_BUILTIN_CMPNGTPD,
19606 IX86_BUILTIN_CMPNGEPD,
19607 IX86_BUILTIN_CMPORDPD,
19608 IX86_BUILTIN_CMPUNORDPD,
19609 IX86_BUILTIN_CMPEQSD,
19610 IX86_BUILTIN_CMPLTSD,
19611 IX86_BUILTIN_CMPLESD,
19612 IX86_BUILTIN_CMPNEQSD,
19613 IX86_BUILTIN_CMPNLTSD,
19614 IX86_BUILTIN_CMPNLESD,
19615 IX86_BUILTIN_CMPORDSD,
19616 IX86_BUILTIN_CMPUNORDSD,
19618 IX86_BUILTIN_COMIEQSD,
19619 IX86_BUILTIN_COMILTSD,
19620 IX86_BUILTIN_COMILESD,
19621 IX86_BUILTIN_COMIGTSD,
19622 IX86_BUILTIN_COMIGESD,
19623 IX86_BUILTIN_COMINEQSD,
19624 IX86_BUILTIN_UCOMIEQSD,
19625 IX86_BUILTIN_UCOMILTSD,
19626 IX86_BUILTIN_UCOMILESD,
19627 IX86_BUILTIN_UCOMIGTSD,
19628 IX86_BUILTIN_UCOMIGESD,
19629 IX86_BUILTIN_UCOMINEQSD,
19631 IX86_BUILTIN_MAXPD,
19632 IX86_BUILTIN_MAXSD,
19633 IX86_BUILTIN_MINPD,
19634 IX86_BUILTIN_MINSD,
19636 IX86_BUILTIN_ANDPD,
19637 IX86_BUILTIN_ANDNPD,
19639 IX86_BUILTIN_XORPD,
19641 IX86_BUILTIN_SQRTPD,
19642 IX86_BUILTIN_SQRTSD,
19644 IX86_BUILTIN_UNPCKHPD,
19645 IX86_BUILTIN_UNPCKLPD,
19647 IX86_BUILTIN_SHUFPD,
19649 IX86_BUILTIN_LOADUPD,
19650 IX86_BUILTIN_STOREUPD,
19651 IX86_BUILTIN_MOVSD,
19653 IX86_BUILTIN_LOADHPD,
19654 IX86_BUILTIN_LOADLPD,
19656 IX86_BUILTIN_CVTDQ2PD,
19657 IX86_BUILTIN_CVTDQ2PS,
19659 IX86_BUILTIN_CVTPD2DQ,
19660 IX86_BUILTIN_CVTPD2PI,
19661 IX86_BUILTIN_CVTPD2PS,
19662 IX86_BUILTIN_CVTTPD2DQ,
19663 IX86_BUILTIN_CVTTPD2PI,
19665 IX86_BUILTIN_CVTPI2PD,
19666 IX86_BUILTIN_CVTSI2SD,
19667 IX86_BUILTIN_CVTSI642SD,
19669 IX86_BUILTIN_CVTSD2SI,
19670 IX86_BUILTIN_CVTSD2SI64,
19671 IX86_BUILTIN_CVTSD2SS,
19672 IX86_BUILTIN_CVTSS2SD,
19673 IX86_BUILTIN_CVTTSD2SI,
19674 IX86_BUILTIN_CVTTSD2SI64,
19676 IX86_BUILTIN_CVTPS2DQ,
19677 IX86_BUILTIN_CVTPS2PD,
19678 IX86_BUILTIN_CVTTPS2DQ,
19680 IX86_BUILTIN_MOVNTI,
19681 IX86_BUILTIN_MOVNTPD,
19682 IX86_BUILTIN_MOVNTDQ,
19684 IX86_BUILTIN_MOVQ128,
19687 IX86_BUILTIN_MASKMOVDQU,
19688 IX86_BUILTIN_MOVMSKPD,
19689 IX86_BUILTIN_PMOVMSKB128,
19691 IX86_BUILTIN_PACKSSWB128,
19692 IX86_BUILTIN_PACKSSDW128,
19693 IX86_BUILTIN_PACKUSWB128,
19695 IX86_BUILTIN_PADDB128,
19696 IX86_BUILTIN_PADDW128,
19697 IX86_BUILTIN_PADDD128,
19698 IX86_BUILTIN_PADDQ128,
19699 IX86_BUILTIN_PADDSB128,
19700 IX86_BUILTIN_PADDSW128,
19701 IX86_BUILTIN_PADDUSB128,
19702 IX86_BUILTIN_PADDUSW128,
19703 IX86_BUILTIN_PSUBB128,
19704 IX86_BUILTIN_PSUBW128,
19705 IX86_BUILTIN_PSUBD128,
19706 IX86_BUILTIN_PSUBQ128,
19707 IX86_BUILTIN_PSUBSB128,
19708 IX86_BUILTIN_PSUBSW128,
19709 IX86_BUILTIN_PSUBUSB128,
19710 IX86_BUILTIN_PSUBUSW128,
19712 IX86_BUILTIN_PAND128,
19713 IX86_BUILTIN_PANDN128,
19714 IX86_BUILTIN_POR128,
19715 IX86_BUILTIN_PXOR128,
19717 IX86_BUILTIN_PAVGB128,
19718 IX86_BUILTIN_PAVGW128,
19720 IX86_BUILTIN_PCMPEQB128,
19721 IX86_BUILTIN_PCMPEQW128,
19722 IX86_BUILTIN_PCMPEQD128,
19723 IX86_BUILTIN_PCMPGTB128,
19724 IX86_BUILTIN_PCMPGTW128,
19725 IX86_BUILTIN_PCMPGTD128,
19727 IX86_BUILTIN_PMADDWD128,
19729 IX86_BUILTIN_PMAXSW128,
19730 IX86_BUILTIN_PMAXUB128,
19731 IX86_BUILTIN_PMINSW128,
19732 IX86_BUILTIN_PMINUB128,
19734 IX86_BUILTIN_PMULUDQ,
19735 IX86_BUILTIN_PMULUDQ128,
19736 IX86_BUILTIN_PMULHUW128,
19737 IX86_BUILTIN_PMULHW128,
19738 IX86_BUILTIN_PMULLW128,
19740 IX86_BUILTIN_PSADBW128,
19741 IX86_BUILTIN_PSHUFHW,
19742 IX86_BUILTIN_PSHUFLW,
19743 IX86_BUILTIN_PSHUFD,
19745 IX86_BUILTIN_PSLLDQI128,
19746 IX86_BUILTIN_PSLLWI128,
19747 IX86_BUILTIN_PSLLDI128,
19748 IX86_BUILTIN_PSLLQI128,
19749 IX86_BUILTIN_PSRAWI128,
19750 IX86_BUILTIN_PSRADI128,
19751 IX86_BUILTIN_PSRLDQI128,
19752 IX86_BUILTIN_PSRLWI128,
19753 IX86_BUILTIN_PSRLDI128,
19754 IX86_BUILTIN_PSRLQI128,
19756 IX86_BUILTIN_PSLLDQ128,
19757 IX86_BUILTIN_PSLLW128,
19758 IX86_BUILTIN_PSLLD128,
19759 IX86_BUILTIN_PSLLQ128,
19760 IX86_BUILTIN_PSRAW128,
19761 IX86_BUILTIN_PSRAD128,
19762 IX86_BUILTIN_PSRLW128,
19763 IX86_BUILTIN_PSRLD128,
19764 IX86_BUILTIN_PSRLQ128,
19766 IX86_BUILTIN_PUNPCKHBW128,
19767 IX86_BUILTIN_PUNPCKHWD128,
19768 IX86_BUILTIN_PUNPCKHDQ128,
19769 IX86_BUILTIN_PUNPCKHQDQ128,
19770 IX86_BUILTIN_PUNPCKLBW128,
19771 IX86_BUILTIN_PUNPCKLWD128,
19772 IX86_BUILTIN_PUNPCKLDQ128,
19773 IX86_BUILTIN_PUNPCKLQDQ128,
19775 IX86_BUILTIN_CLFLUSH,
19776 IX86_BUILTIN_MFENCE,
19777 IX86_BUILTIN_LFENCE,
19780 IX86_BUILTIN_ADDSUBPS,
19781 IX86_BUILTIN_HADDPS,
19782 IX86_BUILTIN_HSUBPS,
19783 IX86_BUILTIN_MOVSHDUP,
19784 IX86_BUILTIN_MOVSLDUP,
19785 IX86_BUILTIN_ADDSUBPD,
19786 IX86_BUILTIN_HADDPD,
19787 IX86_BUILTIN_HSUBPD,
19788 IX86_BUILTIN_LDDQU,
19790 IX86_BUILTIN_MONITOR,
19791 IX86_BUILTIN_MWAIT,
19794 IX86_BUILTIN_PHADDW,
19795 IX86_BUILTIN_PHADDD,
19796 IX86_BUILTIN_PHADDSW,
19797 IX86_BUILTIN_PHSUBW,
19798 IX86_BUILTIN_PHSUBD,
19799 IX86_BUILTIN_PHSUBSW,
19800 IX86_BUILTIN_PMADDUBSW,
19801 IX86_BUILTIN_PMULHRSW,
19802 IX86_BUILTIN_PSHUFB,
19803 IX86_BUILTIN_PSIGNB,
19804 IX86_BUILTIN_PSIGNW,
19805 IX86_BUILTIN_PSIGND,
19806 IX86_BUILTIN_PALIGNR,
19807 IX86_BUILTIN_PABSB,
19808 IX86_BUILTIN_PABSW,
19809 IX86_BUILTIN_PABSD,
19811 IX86_BUILTIN_PHADDW128,
19812 IX86_BUILTIN_PHADDD128,
19813 IX86_BUILTIN_PHADDSW128,
19814 IX86_BUILTIN_PHSUBW128,
19815 IX86_BUILTIN_PHSUBD128,
19816 IX86_BUILTIN_PHSUBSW128,
19817 IX86_BUILTIN_PMADDUBSW128,
19818 IX86_BUILTIN_PMULHRSW128,
19819 IX86_BUILTIN_PSHUFB128,
19820 IX86_BUILTIN_PSIGNB128,
19821 IX86_BUILTIN_PSIGNW128,
19822 IX86_BUILTIN_PSIGND128,
19823 IX86_BUILTIN_PALIGNR128,
19824 IX86_BUILTIN_PABSB128,
19825 IX86_BUILTIN_PABSW128,
19826 IX86_BUILTIN_PABSD128,
19828 /* AMDFAM10 - SSE4A New Instructions. */
19829 IX86_BUILTIN_MOVNTSD,
19830 IX86_BUILTIN_MOVNTSS,
19831 IX86_BUILTIN_EXTRQI,
19832 IX86_BUILTIN_EXTRQ,
19833 IX86_BUILTIN_INSERTQI,
19834 IX86_BUILTIN_INSERTQ,
19837 IX86_BUILTIN_BLENDPD,
19838 IX86_BUILTIN_BLENDPS,
19839 IX86_BUILTIN_BLENDVPD,
19840 IX86_BUILTIN_BLENDVPS,
19841 IX86_BUILTIN_PBLENDVB128,
19842 IX86_BUILTIN_PBLENDW128,
19847 IX86_BUILTIN_INSERTPS128,
19849 IX86_BUILTIN_MOVNTDQA,
19850 IX86_BUILTIN_MPSADBW128,
19851 IX86_BUILTIN_PACKUSDW128,
19852 IX86_BUILTIN_PCMPEQQ,
19853 IX86_BUILTIN_PHMINPOSUW128,
19855 IX86_BUILTIN_PMAXSB128,
19856 IX86_BUILTIN_PMAXSD128,
19857 IX86_BUILTIN_PMAXUD128,
19858 IX86_BUILTIN_PMAXUW128,
19860 IX86_BUILTIN_PMINSB128,
19861 IX86_BUILTIN_PMINSD128,
19862 IX86_BUILTIN_PMINUD128,
19863 IX86_BUILTIN_PMINUW128,
19865 IX86_BUILTIN_PMOVSXBW128,
19866 IX86_BUILTIN_PMOVSXBD128,
19867 IX86_BUILTIN_PMOVSXBQ128,
19868 IX86_BUILTIN_PMOVSXWD128,
19869 IX86_BUILTIN_PMOVSXWQ128,
19870 IX86_BUILTIN_PMOVSXDQ128,
19872 IX86_BUILTIN_PMOVZXBW128,
19873 IX86_BUILTIN_PMOVZXBD128,
19874 IX86_BUILTIN_PMOVZXBQ128,
19875 IX86_BUILTIN_PMOVZXWD128,
19876 IX86_BUILTIN_PMOVZXWQ128,
19877 IX86_BUILTIN_PMOVZXDQ128,
19879 IX86_BUILTIN_PMULDQ128,
19880 IX86_BUILTIN_PMULLD128,
19882 IX86_BUILTIN_ROUNDPD,
19883 IX86_BUILTIN_ROUNDPS,
19884 IX86_BUILTIN_ROUNDSD,
19885 IX86_BUILTIN_ROUNDSS,
19887 IX86_BUILTIN_PTESTZ,
19888 IX86_BUILTIN_PTESTC,
19889 IX86_BUILTIN_PTESTNZC,
19891 IX86_BUILTIN_VEC_INIT_V2SI,
19892 IX86_BUILTIN_VEC_INIT_V4HI,
19893 IX86_BUILTIN_VEC_INIT_V8QI,
19894 IX86_BUILTIN_VEC_EXT_V2DF,
19895 IX86_BUILTIN_VEC_EXT_V2DI,
19896 IX86_BUILTIN_VEC_EXT_V4SF,
19897 IX86_BUILTIN_VEC_EXT_V4SI,
19898 IX86_BUILTIN_VEC_EXT_V8HI,
19899 IX86_BUILTIN_VEC_EXT_V2SI,
19900 IX86_BUILTIN_VEC_EXT_V4HI,
19901 IX86_BUILTIN_VEC_EXT_V16QI,
19902 IX86_BUILTIN_VEC_SET_V2DI,
19903 IX86_BUILTIN_VEC_SET_V4SF,
19904 IX86_BUILTIN_VEC_SET_V4SI,
19905 IX86_BUILTIN_VEC_SET_V8HI,
19906 IX86_BUILTIN_VEC_SET_V4HI,
19907 IX86_BUILTIN_VEC_SET_V16QI,
19909 IX86_BUILTIN_VEC_PACK_SFIX,
19912 IX86_BUILTIN_CRC32QI,
19913 IX86_BUILTIN_CRC32HI,
19914 IX86_BUILTIN_CRC32SI,
19915 IX86_BUILTIN_CRC32DI,
19917 IX86_BUILTIN_PCMPESTRI128,
19918 IX86_BUILTIN_PCMPESTRM128,
19919 IX86_BUILTIN_PCMPESTRA128,
19920 IX86_BUILTIN_PCMPESTRC128,
19921 IX86_BUILTIN_PCMPESTRO128,
19922 IX86_BUILTIN_PCMPESTRS128,
19923 IX86_BUILTIN_PCMPESTRZ128,
19924 IX86_BUILTIN_PCMPISTRI128,
19925 IX86_BUILTIN_PCMPISTRM128,
19926 IX86_BUILTIN_PCMPISTRA128,
19927 IX86_BUILTIN_PCMPISTRC128,
19928 IX86_BUILTIN_PCMPISTRO128,
19929 IX86_BUILTIN_PCMPISTRS128,
19930 IX86_BUILTIN_PCMPISTRZ128,
19932 IX86_BUILTIN_PCMPGTQ,
19934 /* AES instructions */
19935 IX86_BUILTIN_AESENC128,
19936 IX86_BUILTIN_AESENCLAST128,
19937 IX86_BUILTIN_AESDEC128,
19938 IX86_BUILTIN_AESDECLAST128,
19939 IX86_BUILTIN_AESIMC128,
19940 IX86_BUILTIN_AESKEYGENASSIST128,
19942 /* PCLMUL instruction */
19943 IX86_BUILTIN_PCLMULQDQ128,
19946 IX86_BUILTIN_ADDPD256,
19947 IX86_BUILTIN_ADDPS256,
19948 IX86_BUILTIN_ADDSUBPD256,
19949 IX86_BUILTIN_ADDSUBPS256,
19950 IX86_BUILTIN_ANDPD256,
19951 IX86_BUILTIN_ANDPS256,
19952 IX86_BUILTIN_ANDNPD256,
19953 IX86_BUILTIN_ANDNPS256,
19954 IX86_BUILTIN_BLENDPD256,
19955 IX86_BUILTIN_BLENDPS256,
19956 IX86_BUILTIN_BLENDVPD256,
19957 IX86_BUILTIN_BLENDVPS256,
19958 IX86_BUILTIN_DIVPD256,
19959 IX86_BUILTIN_DIVPS256,
19960 IX86_BUILTIN_DPPS256,
19961 IX86_BUILTIN_HADDPD256,
19962 IX86_BUILTIN_HADDPS256,
19963 IX86_BUILTIN_HSUBPD256,
19964 IX86_BUILTIN_HSUBPS256,
19965 IX86_BUILTIN_MAXPD256,
19966 IX86_BUILTIN_MAXPS256,
19967 IX86_BUILTIN_MINPD256,
19968 IX86_BUILTIN_MINPS256,
19969 IX86_BUILTIN_MULPD256,
19970 IX86_BUILTIN_MULPS256,
19971 IX86_BUILTIN_ORPD256,
19972 IX86_BUILTIN_ORPS256,
19973 IX86_BUILTIN_SHUFPD256,
19974 IX86_BUILTIN_SHUFPS256,
19975 IX86_BUILTIN_SUBPD256,
19976 IX86_BUILTIN_SUBPS256,
19977 IX86_BUILTIN_XORPD256,
19978 IX86_BUILTIN_XORPS256,
19979 IX86_BUILTIN_CMPSD,
19980 IX86_BUILTIN_CMPSS,
19981 IX86_BUILTIN_CMPPD,
19982 IX86_BUILTIN_CMPPS,
19983 IX86_BUILTIN_CMPPD256,
19984 IX86_BUILTIN_CMPPS256,
19985 IX86_BUILTIN_CVTDQ2PD256,
19986 IX86_BUILTIN_CVTDQ2PS256,
19987 IX86_BUILTIN_CVTPD2PS256,
19988 IX86_BUILTIN_CVTPS2DQ256,
19989 IX86_BUILTIN_CVTPS2PD256,
19990 IX86_BUILTIN_CVTTPD2DQ256,
19991 IX86_BUILTIN_CVTPD2DQ256,
19992 IX86_BUILTIN_CVTTPS2DQ256,
19993 IX86_BUILTIN_EXTRACTF128PD256,
19994 IX86_BUILTIN_EXTRACTF128PS256,
19995 IX86_BUILTIN_EXTRACTF128SI256,
19996 IX86_BUILTIN_VZEROALL,
19997 IX86_BUILTIN_VZEROUPPER,
19998 IX86_BUILTIN_VZEROUPPER_REX64,
19999 IX86_BUILTIN_VPERMILVARPD,
20000 IX86_BUILTIN_VPERMILVARPS,
20001 IX86_BUILTIN_VPERMILVARPD256,
20002 IX86_BUILTIN_VPERMILVARPS256,
20003 IX86_BUILTIN_VPERMILPD,
20004 IX86_BUILTIN_VPERMILPS,
20005 IX86_BUILTIN_VPERMILPD256,
20006 IX86_BUILTIN_VPERMILPS256,
20007 IX86_BUILTIN_VPERM2F128PD256,
20008 IX86_BUILTIN_VPERM2F128PS256,
20009 IX86_BUILTIN_VPERM2F128SI256,
20010 IX86_BUILTIN_VBROADCASTSS,
20011 IX86_BUILTIN_VBROADCASTSD256,
20012 IX86_BUILTIN_VBROADCASTSS256,
20013 IX86_BUILTIN_VBROADCASTPD256,
20014 IX86_BUILTIN_VBROADCASTPS256,
20015 IX86_BUILTIN_VINSERTF128PD256,
20016 IX86_BUILTIN_VINSERTF128PS256,
20017 IX86_BUILTIN_VINSERTF128SI256,
20018 IX86_BUILTIN_LOADUPD256,
20019 IX86_BUILTIN_LOADUPS256,
20020 IX86_BUILTIN_STOREUPD256,
20021 IX86_BUILTIN_STOREUPS256,
20022 IX86_BUILTIN_LDDQU256,
20023 IX86_BUILTIN_MOVNTDQ256,
20024 IX86_BUILTIN_MOVNTPD256,
20025 IX86_BUILTIN_MOVNTPS256,
20026 IX86_BUILTIN_LOADDQU256,
20027 IX86_BUILTIN_STOREDQU256,
20028 IX86_BUILTIN_MASKLOADPD,
20029 IX86_BUILTIN_MASKLOADPS,
20030 IX86_BUILTIN_MASKSTOREPD,
20031 IX86_BUILTIN_MASKSTOREPS,
20032 IX86_BUILTIN_MASKLOADPD256,
20033 IX86_BUILTIN_MASKLOADPS256,
20034 IX86_BUILTIN_MASKSTOREPD256,
20035 IX86_BUILTIN_MASKSTOREPS256,
20036 IX86_BUILTIN_MOVSHDUP256,
20037 IX86_BUILTIN_MOVSLDUP256,
20038 IX86_BUILTIN_MOVDDUP256,
20040 IX86_BUILTIN_SQRTPD256,
20041 IX86_BUILTIN_SQRTPS256,
20042 IX86_BUILTIN_SQRTPS_NR256,
20043 IX86_BUILTIN_RSQRTPS256,
20044 IX86_BUILTIN_RSQRTPS_NR256,
20046 IX86_BUILTIN_RCPPS256,
20048 IX86_BUILTIN_ROUNDPD256,
20049 IX86_BUILTIN_ROUNDPS256,
20051 IX86_BUILTIN_UNPCKHPD256,
20052 IX86_BUILTIN_UNPCKLPD256,
20053 IX86_BUILTIN_UNPCKHPS256,
20054 IX86_BUILTIN_UNPCKLPS256,
20056 IX86_BUILTIN_SI256_SI,
20057 IX86_BUILTIN_PS256_PS,
20058 IX86_BUILTIN_PD256_PD,
20059 IX86_BUILTIN_SI_SI256,
20060 IX86_BUILTIN_PS_PS256,
20061 IX86_BUILTIN_PD_PD256,
20063 IX86_BUILTIN_VTESTZPD,
20064 IX86_BUILTIN_VTESTCPD,
20065 IX86_BUILTIN_VTESTNZCPD,
20066 IX86_BUILTIN_VTESTZPS,
20067 IX86_BUILTIN_VTESTCPS,
20068 IX86_BUILTIN_VTESTNZCPS,
20069 IX86_BUILTIN_VTESTZPD256,
20070 IX86_BUILTIN_VTESTCPD256,
20071 IX86_BUILTIN_VTESTNZCPD256,
20072 IX86_BUILTIN_VTESTZPS256,
20073 IX86_BUILTIN_VTESTCPS256,
20074 IX86_BUILTIN_VTESTNZCPS256,
20075 IX86_BUILTIN_PTESTZ256,
20076 IX86_BUILTIN_PTESTC256,
20077 IX86_BUILTIN_PTESTNZC256,
20079 IX86_BUILTIN_MOVMSKPD256,
20080 IX86_BUILTIN_MOVMSKPS256,
20082 /* TFmode support builtins. */
20084 IX86_BUILTIN_FABSQ,
20085 IX86_BUILTIN_COPYSIGNQ,
20087 /* SSE5 instructions */
20088 IX86_BUILTIN_FMADDSS,
20089 IX86_BUILTIN_FMADDSD,
20090 IX86_BUILTIN_FMADDPS,
20091 IX86_BUILTIN_FMADDPD,
20092 IX86_BUILTIN_FMSUBSS,
20093 IX86_BUILTIN_FMSUBSD,
20094 IX86_BUILTIN_FMSUBPS,
20095 IX86_BUILTIN_FMSUBPD,
20096 IX86_BUILTIN_FNMADDSS,
20097 IX86_BUILTIN_FNMADDSD,
20098 IX86_BUILTIN_FNMADDPS,
20099 IX86_BUILTIN_FNMADDPD,
20100 IX86_BUILTIN_FNMSUBSS,
20101 IX86_BUILTIN_FNMSUBSD,
20102 IX86_BUILTIN_FNMSUBPS,
20103 IX86_BUILTIN_FNMSUBPD,
20104 IX86_BUILTIN_PCMOV,
20105 IX86_BUILTIN_PCMOV_V2DI,
20106 IX86_BUILTIN_PCMOV_V4SI,
20107 IX86_BUILTIN_PCMOV_V8HI,
20108 IX86_BUILTIN_PCMOV_V16QI,
20109 IX86_BUILTIN_PCMOV_V4SF,
20110 IX86_BUILTIN_PCMOV_V2DF,
20111 IX86_BUILTIN_PPERM,
20112 IX86_BUILTIN_PERMPS,
20113 IX86_BUILTIN_PERMPD,
20114 IX86_BUILTIN_PMACSSWW,
20115 IX86_BUILTIN_PMACSWW,
20116 IX86_BUILTIN_PMACSSWD,
20117 IX86_BUILTIN_PMACSWD,
20118 IX86_BUILTIN_PMACSSDD,
20119 IX86_BUILTIN_PMACSDD,
20120 IX86_BUILTIN_PMACSSDQL,
20121 IX86_BUILTIN_PMACSSDQH,
20122 IX86_BUILTIN_PMACSDQL,
20123 IX86_BUILTIN_PMACSDQH,
20124 IX86_BUILTIN_PMADCSSWD,
20125 IX86_BUILTIN_PMADCSWD,
20126 IX86_BUILTIN_PHADDBW,
20127 IX86_BUILTIN_PHADDBD,
20128 IX86_BUILTIN_PHADDBQ,
20129 IX86_BUILTIN_PHADDWD,
20130 IX86_BUILTIN_PHADDWQ,
20131 IX86_BUILTIN_PHADDDQ,
20132 IX86_BUILTIN_PHADDUBW,
20133 IX86_BUILTIN_PHADDUBD,
20134 IX86_BUILTIN_PHADDUBQ,
20135 IX86_BUILTIN_PHADDUWD,
20136 IX86_BUILTIN_PHADDUWQ,
20137 IX86_BUILTIN_PHADDUDQ,
20138 IX86_BUILTIN_PHSUBBW,
20139 IX86_BUILTIN_PHSUBWD,
20140 IX86_BUILTIN_PHSUBDQ,
20141 IX86_BUILTIN_PROTB,
20142 IX86_BUILTIN_PROTW,
20143 IX86_BUILTIN_PROTD,
20144 IX86_BUILTIN_PROTQ,
20145 IX86_BUILTIN_PROTB_IMM,
20146 IX86_BUILTIN_PROTW_IMM,
20147 IX86_BUILTIN_PROTD_IMM,
20148 IX86_BUILTIN_PROTQ_IMM,
20149 IX86_BUILTIN_PSHLB,
20150 IX86_BUILTIN_PSHLW,
20151 IX86_BUILTIN_PSHLD,
20152 IX86_BUILTIN_PSHLQ,
20153 IX86_BUILTIN_PSHAB,
20154 IX86_BUILTIN_PSHAW,
20155 IX86_BUILTIN_PSHAD,
20156 IX86_BUILTIN_PSHAQ,
20157 IX86_BUILTIN_FRCZSS,
20158 IX86_BUILTIN_FRCZSD,
20159 IX86_BUILTIN_FRCZPS,
20160 IX86_BUILTIN_FRCZPD,
20161 IX86_BUILTIN_CVTPH2PS,
20162 IX86_BUILTIN_CVTPS2PH,
20164 IX86_BUILTIN_COMEQSS,
20165 IX86_BUILTIN_COMNESS,
20166 IX86_BUILTIN_COMLTSS,
20167 IX86_BUILTIN_COMLESS,
20168 IX86_BUILTIN_COMGTSS,
20169 IX86_BUILTIN_COMGESS,
20170 IX86_BUILTIN_COMUEQSS,
20171 IX86_BUILTIN_COMUNESS,
20172 IX86_BUILTIN_COMULTSS,
20173 IX86_BUILTIN_COMULESS,
20174 IX86_BUILTIN_COMUGTSS,
20175 IX86_BUILTIN_COMUGESS,
20176 IX86_BUILTIN_COMORDSS,
20177 IX86_BUILTIN_COMUNORDSS,
20178 IX86_BUILTIN_COMFALSESS,
20179 IX86_BUILTIN_COMTRUESS,
20181 IX86_BUILTIN_COMEQSD,
20182 IX86_BUILTIN_COMNESD,
20183 IX86_BUILTIN_COMLTSD,
20184 IX86_BUILTIN_COMLESD,
20185 IX86_BUILTIN_COMGTSD,
20186 IX86_BUILTIN_COMGESD,
20187 IX86_BUILTIN_COMUEQSD,
20188 IX86_BUILTIN_COMUNESD,
20189 IX86_BUILTIN_COMULTSD,
20190 IX86_BUILTIN_COMULESD,
20191 IX86_BUILTIN_COMUGTSD,
20192 IX86_BUILTIN_COMUGESD,
20193 IX86_BUILTIN_COMORDSD,
20194 IX86_BUILTIN_COMUNORDSD,
20195 IX86_BUILTIN_COMFALSESD,
20196 IX86_BUILTIN_COMTRUESD,
20198 IX86_BUILTIN_COMEQPS,
20199 IX86_BUILTIN_COMNEPS,
20200 IX86_BUILTIN_COMLTPS,
20201 IX86_BUILTIN_COMLEPS,
20202 IX86_BUILTIN_COMGTPS,
20203 IX86_BUILTIN_COMGEPS,
20204 IX86_BUILTIN_COMUEQPS,
20205 IX86_BUILTIN_COMUNEPS,
20206 IX86_BUILTIN_COMULTPS,
20207 IX86_BUILTIN_COMULEPS,
20208 IX86_BUILTIN_COMUGTPS,
20209 IX86_BUILTIN_COMUGEPS,
20210 IX86_BUILTIN_COMORDPS,
20211 IX86_BUILTIN_COMUNORDPS,
20212 IX86_BUILTIN_COMFALSEPS,
20213 IX86_BUILTIN_COMTRUEPS,
20215 IX86_BUILTIN_COMEQPD,
20216 IX86_BUILTIN_COMNEPD,
20217 IX86_BUILTIN_COMLTPD,
20218 IX86_BUILTIN_COMLEPD,
20219 IX86_BUILTIN_COMGTPD,
20220 IX86_BUILTIN_COMGEPD,
20221 IX86_BUILTIN_COMUEQPD,
20222 IX86_BUILTIN_COMUNEPD,
20223 IX86_BUILTIN_COMULTPD,
20224 IX86_BUILTIN_COMULEPD,
20225 IX86_BUILTIN_COMUGTPD,
20226 IX86_BUILTIN_COMUGEPD,
20227 IX86_BUILTIN_COMORDPD,
20228 IX86_BUILTIN_COMUNORDPD,
20229 IX86_BUILTIN_COMFALSEPD,
20230 IX86_BUILTIN_COMTRUEPD,
20232 IX86_BUILTIN_PCOMEQUB,
20233 IX86_BUILTIN_PCOMNEUB,
20234 IX86_BUILTIN_PCOMLTUB,
20235 IX86_BUILTIN_PCOMLEUB,
20236 IX86_BUILTIN_PCOMGTUB,
20237 IX86_BUILTIN_PCOMGEUB,
20238 IX86_BUILTIN_PCOMFALSEUB,
20239 IX86_BUILTIN_PCOMTRUEUB,
20240 IX86_BUILTIN_PCOMEQUW,
20241 IX86_BUILTIN_PCOMNEUW,
20242 IX86_BUILTIN_PCOMLTUW,
20243 IX86_BUILTIN_PCOMLEUW,
20244 IX86_BUILTIN_PCOMGTUW,
20245 IX86_BUILTIN_PCOMGEUW,
20246 IX86_BUILTIN_PCOMFALSEUW,
20247 IX86_BUILTIN_PCOMTRUEUW,
20248 IX86_BUILTIN_PCOMEQUD,
20249 IX86_BUILTIN_PCOMNEUD,
20250 IX86_BUILTIN_PCOMLTUD,
20251 IX86_BUILTIN_PCOMLEUD,
20252 IX86_BUILTIN_PCOMGTUD,
20253 IX86_BUILTIN_PCOMGEUD,
20254 IX86_BUILTIN_PCOMFALSEUD,
20255 IX86_BUILTIN_PCOMTRUEUD,
20256 IX86_BUILTIN_PCOMEQUQ,
20257 IX86_BUILTIN_PCOMNEUQ,
20258 IX86_BUILTIN_PCOMLTUQ,
20259 IX86_BUILTIN_PCOMLEUQ,
20260 IX86_BUILTIN_PCOMGTUQ,
20261 IX86_BUILTIN_PCOMGEUQ,
20262 IX86_BUILTIN_PCOMFALSEUQ,
20263 IX86_BUILTIN_PCOMTRUEUQ,
20265 IX86_BUILTIN_PCOMEQB,
20266 IX86_BUILTIN_PCOMNEB,
20267 IX86_BUILTIN_PCOMLTB,
20268 IX86_BUILTIN_PCOMLEB,
20269 IX86_BUILTIN_PCOMGTB,
20270 IX86_BUILTIN_PCOMGEB,
20271 IX86_BUILTIN_PCOMFALSEB,
20272 IX86_BUILTIN_PCOMTRUEB,
20273 IX86_BUILTIN_PCOMEQW,
20274 IX86_BUILTIN_PCOMNEW,
20275 IX86_BUILTIN_PCOMLTW,
20276 IX86_BUILTIN_PCOMLEW,
20277 IX86_BUILTIN_PCOMGTW,
20278 IX86_BUILTIN_PCOMGEW,
20279 IX86_BUILTIN_PCOMFALSEW,
20280 IX86_BUILTIN_PCOMTRUEW,
20281 IX86_BUILTIN_PCOMEQD,
20282 IX86_BUILTIN_PCOMNED,
20283 IX86_BUILTIN_PCOMLTD,
20284 IX86_BUILTIN_PCOMLED,
20285 IX86_BUILTIN_PCOMGTD,
20286 IX86_BUILTIN_PCOMGED,
20287 IX86_BUILTIN_PCOMFALSED,
20288 IX86_BUILTIN_PCOMTRUED,
20289 IX86_BUILTIN_PCOMEQQ,
20290 IX86_BUILTIN_PCOMNEQ,
20291 IX86_BUILTIN_PCOMLTQ,
20292 IX86_BUILTIN_PCOMLEQ,
20293 IX86_BUILTIN_PCOMGTQ,
20294 IX86_BUILTIN_PCOMGEQ,
20295 IX86_BUILTIN_PCOMFALSEQ,
20296 IX86_BUILTIN_PCOMTRUEQ,
20301 /* Table for the ix86 builtin decls. */
20302 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20304 /* Table of all of the builtin functions that are possible with different ISA's
20305 but are waiting to be built until a function is declared to use that
20307 struct builtin_isa GTY(())
20309 tree type; /* builtin type to use in the declaration */
20310 const char *name; /* function name */
20311 int isa; /* isa_flags this builtin is defined for */
20312 bool const_p; /* true if the declaration is constant */
20315 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20318 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
20319 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
20320 * function decl in the ix86_builtins array. Returns the function decl or
20321 * NULL_TREE, if the builtin was not added.
20323 * If the front end has a special hook for builtin functions, delay adding
20324 * builtin functions that aren't in the current ISA until the ISA is changed
20325 * with function specific optimization. Doing so, can save about 300K for the
20326 * default compiler. When the builtin is expanded, check at that time whether
20329 * If the front end doesn't have a special hook, record all builtins, even if
20330 * it isn't an instruction set in the current ISA in case the user uses
20331 * function specific options for a different ISA, so that we don't get scope
20332 * errors if a builtin is added in the middle of a function scope. */
20335 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20337 tree decl = NULL_TREE;
20339 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20341 ix86_builtins_isa[(int) code].isa = mask;
20343 if ((mask & ix86_isa_flags) != 0
20344 || (lang_hooks.builtin_function
20345 == lang_hooks.builtin_function_ext_scope))
20348 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20350 ix86_builtins[(int) code] = decl;
20351 ix86_builtins_isa[(int) code].type = NULL_TREE;
20355 ix86_builtins[(int) code] = NULL_TREE;
20356 ix86_builtins_isa[(int) code].const_p = false;
20357 ix86_builtins_isa[(int) code].type = type;
20358 ix86_builtins_isa[(int) code].name = name;
20365 /* Like def_builtin, but also marks the function decl "const". */
20368 def_builtin_const (int mask, const char *name, tree type,
20369 enum ix86_builtins code)
20371 tree decl = def_builtin (mask, name, type, code);
20373 TREE_READONLY (decl) = 1;
20375 ix86_builtins_isa[(int) code].const_p = true;
20380 /* Add any new builtin functions for a given ISA that may not have been
20381 declared. This saves a bit of space compared to adding all of the
20382 declarations to the tree, even if we didn't use them. */
20385 ix86_add_new_builtins (int isa)
20390 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20392 if ((ix86_builtins_isa[i].isa & isa) != 0
20393 && ix86_builtins_isa[i].type != NULL_TREE)
20395 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20396 ix86_builtins_isa[i].type,
20397 i, BUILT_IN_MD, NULL,
20400 ix86_builtins[i] = decl;
20401 ix86_builtins_isa[i].type = NULL_TREE;
20402 if (ix86_builtins_isa[i].const_p)
20403 TREE_READONLY (decl) = 1;
20408 /* Bits for builtin_description.flag. */
20410 /* Set when we don't support the comparison natively, and should
20411 swap_comparison in order to support it. */
20412 #define BUILTIN_DESC_SWAP_OPERANDS 1
20414 struct builtin_description
20416 const unsigned int mask;
20417 const enum insn_code icode;
20418 const char *const name;
20419 const enum ix86_builtins code;
20420 const enum rtx_code comparison;
20424 static const struct builtin_description bdesc_comi[] =
20426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
20427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
20428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
20429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
20430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
20431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
20432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
20433 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
20434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
20435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
20436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
20437 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
20438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
20439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
20440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
20441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
20442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
20443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
20444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
20445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
20446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
20447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
20448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
20449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
20452 static const struct builtin_description bdesc_pcmpestr[] =
20455 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
20456 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
20457 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
20458 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
20459 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
20460 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
20461 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
20464 static const struct builtin_description bdesc_pcmpistr[] =
20467 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
20468 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
20469 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
20470 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
20471 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
20472 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
20473 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
20476 /* Special builtin types */
20477 enum ix86_special_builtin_type
20479 SPECIAL_FTYPE_UNKNOWN,
20481 V32QI_FTYPE_PCCHAR,
20482 V16QI_FTYPE_PCCHAR,
20484 V8SF_FTYPE_PCFLOAT,
20486 V4DF_FTYPE_PCDOUBLE,
20487 V4SF_FTYPE_PCFLOAT,
20488 V2DF_FTYPE_PCDOUBLE,
20489 V8SF_FTYPE_PCV8SF_V8SF,
20490 V4DF_FTYPE_PCV4DF_V4DF,
20491 V4SF_FTYPE_V4SF_PCV2SF,
20492 V4SF_FTYPE_PCV4SF_V4SF,
20493 V2DF_FTYPE_V2DF_PCDOUBLE,
20494 V2DF_FTYPE_PCV2DF_V2DF,
20496 VOID_FTYPE_PV2SF_V4SF,
20497 VOID_FTYPE_PV4DI_V4DI,
20498 VOID_FTYPE_PV2DI_V2DI,
20499 VOID_FTYPE_PCHAR_V32QI,
20500 VOID_FTYPE_PCHAR_V16QI,
20501 VOID_FTYPE_PFLOAT_V8SF,
20502 VOID_FTYPE_PFLOAT_V4SF,
20503 VOID_FTYPE_PDOUBLE_V4DF,
20504 VOID_FTYPE_PDOUBLE_V2DF,
20506 VOID_FTYPE_PINT_INT,
20507 VOID_FTYPE_PV8SF_V8SF_V8SF,
20508 VOID_FTYPE_PV4DF_V4DF_V4DF,
20509 VOID_FTYPE_PV4SF_V4SF_V4SF,
20510 VOID_FTYPE_PV2DF_V2DF_V2DF
20513 /* Builtin types */
20514 enum ix86_builtin_type
20517 FLOAT128_FTYPE_FLOAT128,
20519 FLOAT128_FTYPE_FLOAT128_FLOAT128,
20520 INT_FTYPE_V8SF_V8SF_PTEST,
20521 INT_FTYPE_V4DI_V4DI_PTEST,
20522 INT_FTYPE_V4DF_V4DF_PTEST,
20523 INT_FTYPE_V4SF_V4SF_PTEST,
20524 INT_FTYPE_V2DI_V2DI_PTEST,
20525 INT_FTYPE_V2DF_V2DF_PTEST,
20557 V4SF_FTYPE_V4SF_VEC_MERGE,
20566 V2DF_FTYPE_V2DF_VEC_MERGE,
20577 V16QI_FTYPE_V16QI_V16QI,
20578 V16QI_FTYPE_V8HI_V8HI,
20579 V8QI_FTYPE_V8QI_V8QI,
20580 V8QI_FTYPE_V4HI_V4HI,
20581 V8HI_FTYPE_V8HI_V8HI,
20582 V8HI_FTYPE_V8HI_V8HI_COUNT,
20583 V8HI_FTYPE_V16QI_V16QI,
20584 V8HI_FTYPE_V4SI_V4SI,
20585 V8HI_FTYPE_V8HI_SI_COUNT,
20586 V8SF_FTYPE_V8SF_V8SF,
20587 V8SF_FTYPE_V8SF_V8SI,
20588 V4SI_FTYPE_V4SI_V4SI,
20589 V4SI_FTYPE_V4SI_V4SI_COUNT,
20590 V4SI_FTYPE_V8HI_V8HI,
20591 V4SI_FTYPE_V4SF_V4SF,
20592 V4SI_FTYPE_V2DF_V2DF,
20593 V4SI_FTYPE_V4SI_SI_COUNT,
20594 V4HI_FTYPE_V4HI_V4HI,
20595 V4HI_FTYPE_V4HI_V4HI_COUNT,
20596 V4HI_FTYPE_V8QI_V8QI,
20597 V4HI_FTYPE_V2SI_V2SI,
20598 V4HI_FTYPE_V4HI_SI_COUNT,
20599 V4DF_FTYPE_V4DF_V4DF,
20600 V4DF_FTYPE_V4DF_V4DI,
20601 V4SF_FTYPE_V4SF_V4SF,
20602 V4SF_FTYPE_V4SF_V4SF_SWAP,
20603 V4SF_FTYPE_V4SF_V4SI,
20604 V4SF_FTYPE_V4SF_V2SI,
20605 V4SF_FTYPE_V4SF_V2DF,
20606 V4SF_FTYPE_V4SF_DI,
20607 V4SF_FTYPE_V4SF_SI,
20608 V2DI_FTYPE_V2DI_V2DI,
20609 V2DI_FTYPE_V2DI_V2DI_COUNT,
20610 V2DI_FTYPE_V16QI_V16QI,
20611 V2DI_FTYPE_V4SI_V4SI,
20612 V2DI_FTYPE_V2DI_V16QI,
20613 V2DI_FTYPE_V2DF_V2DF,
20614 V2DI_FTYPE_V2DI_SI_COUNT,
20615 V2SI_FTYPE_V2SI_V2SI,
20616 V2SI_FTYPE_V2SI_V2SI_COUNT,
20617 V2SI_FTYPE_V4HI_V4HI,
20618 V2SI_FTYPE_V2SF_V2SF,
20619 V2SI_FTYPE_V2SI_SI_COUNT,
20620 V2DF_FTYPE_V2DF_V2DF,
20621 V2DF_FTYPE_V2DF_V2DF_SWAP,
20622 V2DF_FTYPE_V2DF_V4SF,
20623 V2DF_FTYPE_V2DF_V2DI,
20624 V2DF_FTYPE_V2DF_DI,
20625 V2DF_FTYPE_V2DF_SI,
20626 V2SF_FTYPE_V2SF_V2SF,
20627 V1DI_FTYPE_V1DI_V1DI,
20628 V1DI_FTYPE_V1DI_V1DI_COUNT,
20629 V1DI_FTYPE_V8QI_V8QI,
20630 V1DI_FTYPE_V2SI_V2SI,
20631 V1DI_FTYPE_V1DI_SI_COUNT,
20632 UINT64_FTYPE_UINT64_UINT64,
20633 UINT_FTYPE_UINT_UINT,
20634 UINT_FTYPE_UINT_USHORT,
20635 UINT_FTYPE_UINT_UCHAR,
20636 V8HI_FTYPE_V8HI_INT,
20637 V4SI_FTYPE_V4SI_INT,
20638 V4HI_FTYPE_V4HI_INT,
20639 V8SF_FTYPE_V8SF_INT,
20640 V4SI_FTYPE_V8SI_INT,
20641 V4SF_FTYPE_V8SF_INT,
20642 V2DF_FTYPE_V4DF_INT,
20643 V4DF_FTYPE_V4DF_INT,
20644 V4SF_FTYPE_V4SF_INT,
20645 V2DI_FTYPE_V2DI_INT,
20646 V2DI2TI_FTYPE_V2DI_INT,
20647 V2DF_FTYPE_V2DF_INT,
20648 V16QI_FTYPE_V16QI_V16QI_V16QI,
20649 V8SF_FTYPE_V8SF_V8SF_V8SF,
20650 V4DF_FTYPE_V4DF_V4DF_V4DF,
20651 V4SF_FTYPE_V4SF_V4SF_V4SF,
20652 V2DF_FTYPE_V2DF_V2DF_V2DF,
20653 V16QI_FTYPE_V16QI_V16QI_INT,
20654 V8SI_FTYPE_V8SI_V8SI_INT,
20655 V8SI_FTYPE_V8SI_V4SI_INT,
20656 V8HI_FTYPE_V8HI_V8HI_INT,
20657 V8SF_FTYPE_V8SF_V8SF_INT,
20658 V8SF_FTYPE_V8SF_V4SF_INT,
20659 V4SI_FTYPE_V4SI_V4SI_INT,
20660 V4DF_FTYPE_V4DF_V4DF_INT,
20661 V4DF_FTYPE_V4DF_V2DF_INT,
20662 V4SF_FTYPE_V4SF_V4SF_INT,
20663 V2DI_FTYPE_V2DI_V2DI_INT,
20664 V2DI2TI_FTYPE_V2DI_V2DI_INT,
20665 V1DI2DI_FTYPE_V1DI_V1DI_INT,
20666 V2DF_FTYPE_V2DF_V2DF_INT,
20667 V2DI_FTYPE_V2DI_UINT_UINT,
20668 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
20671 /* Special builtins with variable number of arguments. */
20672 static const struct builtin_description bdesc_special_args[] =
20675 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20678 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20681 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20682 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20683 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20685 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20686 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20687 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20688 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20690 /* SSE or 3DNow!A */
20691 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20692 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
20695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
20699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
20701 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
20702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
20703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20709 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20712 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
20715 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20716 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
20720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
20721 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
20723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
20727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
20729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
20732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
20733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
20735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
20738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
20739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
20741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
20742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
20743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
20744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
20745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
20746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
20747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
20748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
20751 /* Builtins with variable number of arguments. */
20752 static const struct builtin_description bdesc_args[] =
20755 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20756 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20757 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20758 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20759 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20760 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20762 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20763 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20764 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20765 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20766 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20767 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20768 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20769 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20771 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20772 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20774 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20775 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20776 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20777 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20779 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20780 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20781 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20782 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20783 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20784 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20786 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20787 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20788 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20789 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20790 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
20791 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
20793 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20794 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
20795 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20797 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
20799 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20800 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20801 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20802 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20803 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20804 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20806 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20807 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20808 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20809 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20810 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20811 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20813 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20814 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20815 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20816 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20819 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20820 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20821 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20822 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20824 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20825 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20826 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20827 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20828 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20829 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20830 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20831 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20832 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20833 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20834 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20835 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20836 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20837 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20838 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20841 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20842 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20843 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
20844 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20845 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20846 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20849 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
20850 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20851 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20852 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20853 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20854 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20855 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
20856 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
20857 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
20858 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
20859 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
20860 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
20862 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20864 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20865 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20866 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20867 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20868 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20871 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
20874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
20875 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
20876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20877 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20878 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20879 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
20880 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
20881 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
20882 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20883 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
20884 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20885 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
20886 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
20887 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
20888 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20889 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
20890 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
20891 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
20892 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20893 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20894 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20896 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20897 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20901 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20902 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20903 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20904 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20908 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20909 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20910 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20912 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
20913 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
20914 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
20916 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
20918 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20919 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20920 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20922 /* SSE MMX or 3Dnow!A */
20923 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20924 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20925 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20927 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20928 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20929 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20930 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20932 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
20933 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
20935 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
20938 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20940 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
20941 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
20942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
20943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
20944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
20946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
20947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
20948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
20949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
20950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
20952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
20954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
20955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
20956 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
20957 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
20959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
20960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
20961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
20963 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20964 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20965 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20966 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
20973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
20974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
20975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
20977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
20979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
20980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
20981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20982 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20984 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
20985 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
20986 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
20987 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
20989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
20990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
20991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20994 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20998 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21000 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21001 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21007 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21009 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21010 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21011 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21012 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21013 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21014 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21015 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21016 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21027 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21028 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21030 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21032 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21033 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21045 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21046 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21047 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21052 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21057 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21059 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21063 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21066 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21072 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21077 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21078 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21079 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21080 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21081 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21082 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21084 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21085 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21086 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21087 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21088 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21089 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21090 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21092 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21093 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21094 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21095 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21103 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21104 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21106 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21109 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21110 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21113 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21114 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21116 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21117 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21118 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21119 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21120 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21121 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21124 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21125 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21126 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21127 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21128 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21129 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21131 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21132 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21133 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21134 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21135 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21136 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21137 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21138 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21139 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21140 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21141 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21142 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21143 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21144 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21145 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21146 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21147 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21148 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21149 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21150 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21151 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21152 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21153 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21154 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21157 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21158 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21161 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21162 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21163 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21164 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21165 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21166 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21167 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21168 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21169 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21170 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21172 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21173 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21174 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21175 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21176 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21177 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21178 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21179 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21180 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21181 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21182 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21183 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21184 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21186 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21187 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21188 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21189 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21190 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21191 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21192 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21193 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21194 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21195 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21196 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21197 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21199 /* SSE4.1 and SSE5 */
21200 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21201 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21202 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21203 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21205 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21206 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21207 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21210 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21211 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21212 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21213 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21214 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21217 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21218 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21219 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21220 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21223 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21224 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21226 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21227 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21228 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21229 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21232 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21235 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21236 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21237 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21238 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21239 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21240 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21241 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21242 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21243 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21244 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21245 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21246 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21247 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21248 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21249 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21250 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21251 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21252 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21253 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21254 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21255 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21256 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21257 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21258 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21259 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21260 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21262 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21264 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21265 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21267 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21268 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21269 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21270 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21271 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21272 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21273 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21274 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21275 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21276 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21277 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21278 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21279 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21280 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21281 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21282 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21283 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21284 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21285 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21286 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21287 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21288 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21289 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21290 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21291 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21292 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21293 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21294 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21295 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21296 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21297 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21298 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21299 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21300 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21302 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21303 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21304 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21306 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21307 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21308 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21309 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21310 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21312 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21314 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21315 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21317 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21318 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21319 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21320 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21322 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21323 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21324 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21326 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21327 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21329 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21330 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21331 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21334 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21336 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21337 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21341 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21342 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21345 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
21346 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21350 enum multi_arg_type {
21360 MULTI_ARG_3_PERMPS,
21361 MULTI_ARG_3_PERMPD,
21368 MULTI_ARG_2_DI_IMM,
21369 MULTI_ARG_2_SI_IMM,
21370 MULTI_ARG_2_HI_IMM,
21371 MULTI_ARG_2_QI_IMM,
21372 MULTI_ARG_2_SF_CMP,
21373 MULTI_ARG_2_DF_CMP,
21374 MULTI_ARG_2_DI_CMP,
21375 MULTI_ARG_2_SI_CMP,
21376 MULTI_ARG_2_HI_CMP,
21377 MULTI_ARG_2_QI_CMP,
21400 static const struct builtin_description bdesc_multi_arg[] =
21402 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
21403 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
21404 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
21405 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
21406 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
21407 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
21408 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
21409 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
21410 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
21411 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
21412 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
21413 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
21414 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
21415 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
21416 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
21417 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
21418 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
21419 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
21420 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
21421 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
21422 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
21423 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
21424 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
21425 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
21426 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
21427 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
21428 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
21429 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
21430 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21431 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
21432 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
21433 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
21434 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21435 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21436 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21437 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21438 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21439 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
21440 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
21441 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
21442 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
21443 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
21444 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
21445 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
21446 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
21447 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
21448 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
21449 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
21450 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
21451 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
21452 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
21453 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
21454 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
21455 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
21456 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
21457 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
21458 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
21459 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
21460 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
21461 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
21462 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
21463 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
21464 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
21465 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
21466 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
21467 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
21468 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
21469 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
21470 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
21471 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
21472 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
21473 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
21474 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
21475 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
21476 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
21478 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
21479 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21480 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21481 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
21482 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
21483 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
21484 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
21485 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21486 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21487 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21488 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21489 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21490 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21491 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21492 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21493 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21495 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
21496 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21497 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21498 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
21499 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
21500 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
21501 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
21502 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21503 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21504 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21505 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21506 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21507 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21508 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21509 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21510 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21512 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
21513 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21514 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21515 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
21516 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
21517 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
21518 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
21519 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21520 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21521 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21522 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21523 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21524 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21525 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21526 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21527 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21529 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
21530 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21531 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21532 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
21533 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
21534 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
21535 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
21536 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21537 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21538 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21539 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21540 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21541 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21542 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21543 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21544 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21546 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
21547 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21548 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21549 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
21550 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
21551 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
21552 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
21554 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
21555 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21556 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21557 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
21558 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
21559 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
21560 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
21562 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
21563 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21564 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21565 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
21566 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
21567 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
21568 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
21570 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21571 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21572 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21573 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
21574 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
21575 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
21576 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
21578 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
21579 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21580 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21581 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
21582 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
21583 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
21584 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
21586 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
21587 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21588 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21589 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
21590 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
21591 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
21592 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
21594 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
21595 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21596 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21597 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
21598 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
21599 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
21600 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
21602 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21603 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21604 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21605 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
21606 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
21607 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
21608 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
21610 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
21611 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
21612 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
21613 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
21614 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
21615 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
21616 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
21617 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
21619 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21620 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21621 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21622 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21623 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21624 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21625 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21626 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21628 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21629 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21630 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21631 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21632 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21633 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21634 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21635 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21638 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
21639 in the current target ISA to allow the user to compile particular modules
21640 with different target specific options that differ from the command line
21643 ix86_init_mmx_sse_builtins (void)
21645 const struct builtin_description * d;
21648 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
21649 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
21650 tree V1DI_type_node
21651 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
21652 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
21653 tree V2DI_type_node
21654 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
21655 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
21656 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
21657 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
21658 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
21659 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
21660 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
21662 tree pchar_type_node = build_pointer_type (char_type_node);
21663 tree pcchar_type_node
21664 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
21665 tree pfloat_type_node = build_pointer_type (float_type_node);
21666 tree pcfloat_type_node
21667 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
21668 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
21669 tree pcv2sf_type_node
21670 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
21671 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
21672 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
21675 tree int_ftype_v4sf_v4sf
21676 = build_function_type_list (integer_type_node,
21677 V4SF_type_node, V4SF_type_node, NULL_TREE);
21678 tree v4si_ftype_v4sf_v4sf
21679 = build_function_type_list (V4SI_type_node,
21680 V4SF_type_node, V4SF_type_node, NULL_TREE);
21681 /* MMX/SSE/integer conversions. */
21682 tree int_ftype_v4sf
21683 = build_function_type_list (integer_type_node,
21684 V4SF_type_node, NULL_TREE);
21685 tree int64_ftype_v4sf
21686 = build_function_type_list (long_long_integer_type_node,
21687 V4SF_type_node, NULL_TREE);
21688 tree int_ftype_v8qi
21689 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
21690 tree v4sf_ftype_v4sf_int
21691 = build_function_type_list (V4SF_type_node,
21692 V4SF_type_node, integer_type_node, NULL_TREE);
21693 tree v4sf_ftype_v4sf_int64
21694 = build_function_type_list (V4SF_type_node,
21695 V4SF_type_node, long_long_integer_type_node,
21697 tree v4sf_ftype_v4sf_v2si
21698 = build_function_type_list (V4SF_type_node,
21699 V4SF_type_node, V2SI_type_node, NULL_TREE);
21701 /* Miscellaneous. */
21702 tree v8qi_ftype_v4hi_v4hi
21703 = build_function_type_list (V8QI_type_node,
21704 V4HI_type_node, V4HI_type_node, NULL_TREE);
21705 tree v4hi_ftype_v2si_v2si
21706 = build_function_type_list (V4HI_type_node,
21707 V2SI_type_node, V2SI_type_node, NULL_TREE);
21708 tree v4sf_ftype_v4sf_v4sf_int
21709 = build_function_type_list (V4SF_type_node,
21710 V4SF_type_node, V4SF_type_node,
21711 integer_type_node, NULL_TREE);
21712 tree v2si_ftype_v4hi_v4hi
21713 = build_function_type_list (V2SI_type_node,
21714 V4HI_type_node, V4HI_type_node, NULL_TREE);
21715 tree v4hi_ftype_v4hi_int
21716 = build_function_type_list (V4HI_type_node,
21717 V4HI_type_node, integer_type_node, NULL_TREE);
21718 tree v2si_ftype_v2si_int
21719 = build_function_type_list (V2SI_type_node,
21720 V2SI_type_node, integer_type_node, NULL_TREE);
21721 tree v1di_ftype_v1di_int
21722 = build_function_type_list (V1DI_type_node,
21723 V1DI_type_node, integer_type_node, NULL_TREE);
21725 tree void_ftype_void
21726 = build_function_type (void_type_node, void_list_node);
21727 tree void_ftype_unsigned
21728 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
21729 tree void_ftype_unsigned_unsigned
21730 = build_function_type_list (void_type_node, unsigned_type_node,
21731 unsigned_type_node, NULL_TREE);
21732 tree void_ftype_pcvoid_unsigned_unsigned
21733 = build_function_type_list (void_type_node, const_ptr_type_node,
21734 unsigned_type_node, unsigned_type_node,
21736 tree unsigned_ftype_void
21737 = build_function_type (unsigned_type_node, void_list_node);
21738 tree v2si_ftype_v4sf
21739 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
21740 /* Loads/stores. */
21741 tree void_ftype_v8qi_v8qi_pchar
21742 = build_function_type_list (void_type_node,
21743 V8QI_type_node, V8QI_type_node,
21744 pchar_type_node, NULL_TREE);
21745 tree v4sf_ftype_pcfloat
21746 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
21747 tree v4sf_ftype_v4sf_pcv2sf
21748 = build_function_type_list (V4SF_type_node,
21749 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
21750 tree void_ftype_pv2sf_v4sf
21751 = build_function_type_list (void_type_node,
21752 pv2sf_type_node, V4SF_type_node, NULL_TREE);
21753 tree void_ftype_pfloat_v4sf
21754 = build_function_type_list (void_type_node,
21755 pfloat_type_node, V4SF_type_node, NULL_TREE);
21756 tree void_ftype_pdi_di
21757 = build_function_type_list (void_type_node,
21758 pdi_type_node, long_long_unsigned_type_node,
21760 tree void_ftype_pv2di_v2di
21761 = build_function_type_list (void_type_node,
21762 pv2di_type_node, V2DI_type_node, NULL_TREE);
21763 /* Normal vector unops. */
21764 tree v4sf_ftype_v4sf
21765 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
21766 tree v16qi_ftype_v16qi
21767 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
21768 tree v8hi_ftype_v8hi
21769 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
21770 tree v4si_ftype_v4si
21771 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
21772 tree v8qi_ftype_v8qi
21773 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
21774 tree v4hi_ftype_v4hi
21775 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
21777 /* Normal vector binops. */
21778 tree v4sf_ftype_v4sf_v4sf
21779 = build_function_type_list (V4SF_type_node,
21780 V4SF_type_node, V4SF_type_node, NULL_TREE);
21781 tree v8qi_ftype_v8qi_v8qi
21782 = build_function_type_list (V8QI_type_node,
21783 V8QI_type_node, V8QI_type_node, NULL_TREE);
21784 tree v4hi_ftype_v4hi_v4hi
21785 = build_function_type_list (V4HI_type_node,
21786 V4HI_type_node, V4HI_type_node, NULL_TREE);
21787 tree v2si_ftype_v2si_v2si
21788 = build_function_type_list (V2SI_type_node,
21789 V2SI_type_node, V2SI_type_node, NULL_TREE);
21790 tree v1di_ftype_v1di_v1di
21791 = build_function_type_list (V1DI_type_node,
21792 V1DI_type_node, V1DI_type_node, NULL_TREE);
21793 tree v1di_ftype_v1di_v1di_int
21794 = build_function_type_list (V1DI_type_node,
21795 V1DI_type_node, V1DI_type_node,
21796 integer_type_node, NULL_TREE);
21797 tree v2si_ftype_v2sf
21798 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
21799 tree v2sf_ftype_v2si
21800 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
21801 tree v2si_ftype_v2si
21802 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
21803 tree v2sf_ftype_v2sf
21804 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
21805 tree v2sf_ftype_v2sf_v2sf
21806 = build_function_type_list (V2SF_type_node,
21807 V2SF_type_node, V2SF_type_node, NULL_TREE);
21808 tree v2si_ftype_v2sf_v2sf
21809 = build_function_type_list (V2SI_type_node,
21810 V2SF_type_node, V2SF_type_node, NULL_TREE);
21811 tree pint_type_node = build_pointer_type (integer_type_node);
21812 tree pdouble_type_node = build_pointer_type (double_type_node);
21813 tree pcdouble_type_node = build_pointer_type (
21814 build_type_variant (double_type_node, 1, 0));
21815 tree int_ftype_v2df_v2df
21816 = build_function_type_list (integer_type_node,
21817 V2DF_type_node, V2DF_type_node, NULL_TREE);
21819 tree void_ftype_pcvoid
21820 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
21821 tree v4sf_ftype_v4si
21822 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
21823 tree v4si_ftype_v4sf
21824 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
21825 tree v2df_ftype_v4si
21826 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
21827 tree v4si_ftype_v2df
21828 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
21829 tree v4si_ftype_v2df_v2df
21830 = build_function_type_list (V4SI_type_node,
21831 V2DF_type_node, V2DF_type_node, NULL_TREE);
21832 tree v2si_ftype_v2df
21833 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
21834 tree v4sf_ftype_v2df
21835 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
21836 tree v2df_ftype_v2si
21837 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
21838 tree v2df_ftype_v4sf
21839 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
21840 tree int_ftype_v2df
21841 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
21842 tree int64_ftype_v2df
21843 = build_function_type_list (long_long_integer_type_node,
21844 V2DF_type_node, NULL_TREE);
21845 tree v2df_ftype_v2df_int
21846 = build_function_type_list (V2DF_type_node,
21847 V2DF_type_node, integer_type_node, NULL_TREE);
21848 tree v2df_ftype_v2df_int64
21849 = build_function_type_list (V2DF_type_node,
21850 V2DF_type_node, long_long_integer_type_node,
21852 tree v4sf_ftype_v4sf_v2df
21853 = build_function_type_list (V4SF_type_node,
21854 V4SF_type_node, V2DF_type_node, NULL_TREE);
21855 tree v2df_ftype_v2df_v4sf
21856 = build_function_type_list (V2DF_type_node,
21857 V2DF_type_node, V4SF_type_node, NULL_TREE);
21858 tree v2df_ftype_v2df_v2df_int
21859 = build_function_type_list (V2DF_type_node,
21860 V2DF_type_node, V2DF_type_node,
21863 tree v2df_ftype_v2df_pcdouble
21864 = build_function_type_list (V2DF_type_node,
21865 V2DF_type_node, pcdouble_type_node, NULL_TREE);
21866 tree void_ftype_pdouble_v2df
21867 = build_function_type_list (void_type_node,
21868 pdouble_type_node, V2DF_type_node, NULL_TREE);
21869 tree void_ftype_pint_int
21870 = build_function_type_list (void_type_node,
21871 pint_type_node, integer_type_node, NULL_TREE);
21872 tree void_ftype_v16qi_v16qi_pchar
21873 = build_function_type_list (void_type_node,
21874 V16QI_type_node, V16QI_type_node,
21875 pchar_type_node, NULL_TREE);
21876 tree v2df_ftype_pcdouble
21877 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
21878 tree v2df_ftype_v2df_v2df
21879 = build_function_type_list (V2DF_type_node,
21880 V2DF_type_node, V2DF_type_node, NULL_TREE);
21881 tree v16qi_ftype_v16qi_v16qi
21882 = build_function_type_list (V16QI_type_node,
21883 V16QI_type_node, V16QI_type_node, NULL_TREE);
21884 tree v8hi_ftype_v8hi_v8hi
21885 = build_function_type_list (V8HI_type_node,
21886 V8HI_type_node, V8HI_type_node, NULL_TREE);
21887 tree v4si_ftype_v4si_v4si
21888 = build_function_type_list (V4SI_type_node,
21889 V4SI_type_node, V4SI_type_node, NULL_TREE);
21890 tree v2di_ftype_v2di_v2di
21891 = build_function_type_list (V2DI_type_node,
21892 V2DI_type_node, V2DI_type_node, NULL_TREE);
21893 tree v2di_ftype_v2df_v2df
21894 = build_function_type_list (V2DI_type_node,
21895 V2DF_type_node, V2DF_type_node, NULL_TREE);
21896 tree v2df_ftype_v2df
21897 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
21898 tree v2di_ftype_v2di_int
21899 = build_function_type_list (V2DI_type_node,
21900 V2DI_type_node, integer_type_node, NULL_TREE);
21901 tree v2di_ftype_v2di_v2di_int
21902 = build_function_type_list (V2DI_type_node, V2DI_type_node,
21903 V2DI_type_node, integer_type_node, NULL_TREE);
21904 tree v4si_ftype_v4si_int
21905 = build_function_type_list (V4SI_type_node,
21906 V4SI_type_node, integer_type_node, NULL_TREE);
21907 tree v8hi_ftype_v8hi_int
21908 = build_function_type_list (V8HI_type_node,
21909 V8HI_type_node, integer_type_node, NULL_TREE);
21910 tree v4si_ftype_v8hi_v8hi
21911 = build_function_type_list (V4SI_type_node,
21912 V8HI_type_node, V8HI_type_node, NULL_TREE);
21913 tree v1di_ftype_v8qi_v8qi
21914 = build_function_type_list (V1DI_type_node,
21915 V8QI_type_node, V8QI_type_node, NULL_TREE);
21916 tree v1di_ftype_v2si_v2si
21917 = build_function_type_list (V1DI_type_node,
21918 V2SI_type_node, V2SI_type_node, NULL_TREE);
21919 tree v2di_ftype_v16qi_v16qi
21920 = build_function_type_list (V2DI_type_node,
21921 V16QI_type_node, V16QI_type_node, NULL_TREE);
21922 tree v2di_ftype_v4si_v4si
21923 = build_function_type_list (V2DI_type_node,
21924 V4SI_type_node, V4SI_type_node, NULL_TREE);
21925 tree int_ftype_v16qi
21926 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
21927 tree v16qi_ftype_pcchar
21928 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
21929 tree void_ftype_pchar_v16qi
21930 = build_function_type_list (void_type_node,
21931 pchar_type_node, V16QI_type_node, NULL_TREE);
21933 tree v2di_ftype_v2di_unsigned_unsigned
21934 = build_function_type_list (V2DI_type_node, V2DI_type_node,
21935 unsigned_type_node, unsigned_type_node,
21937 tree v2di_ftype_v2di_v2di_unsigned_unsigned
21938 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
21939 unsigned_type_node, unsigned_type_node,
21941 tree v2di_ftype_v2di_v16qi
21942 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
21944 tree v2df_ftype_v2df_v2df_v2df
21945 = build_function_type_list (V2DF_type_node,
21946 V2DF_type_node, V2DF_type_node,
21947 V2DF_type_node, NULL_TREE);
21948 tree v4sf_ftype_v4sf_v4sf_v4sf
21949 = build_function_type_list (V4SF_type_node,
21950 V4SF_type_node, V4SF_type_node,
21951 V4SF_type_node, NULL_TREE);
21952 tree v8hi_ftype_v16qi
21953 = build_function_type_list (V8HI_type_node, V16QI_type_node,
21955 tree v4si_ftype_v16qi
21956 = build_function_type_list (V4SI_type_node, V16QI_type_node,
21958 tree v2di_ftype_v16qi
21959 = build_function_type_list (V2DI_type_node, V16QI_type_node,
21961 tree v4si_ftype_v8hi
21962 = build_function_type_list (V4SI_type_node, V8HI_type_node,
21964 tree v2di_ftype_v8hi
21965 = build_function_type_list (V2DI_type_node, V8HI_type_node,
21967 tree v2di_ftype_v4si
21968 = build_function_type_list (V2DI_type_node, V4SI_type_node,
21970 tree v2di_ftype_pv2di
21971 = build_function_type_list (V2DI_type_node, pv2di_type_node,
21973 tree v16qi_ftype_v16qi_v16qi_int
21974 = build_function_type_list (V16QI_type_node, V16QI_type_node,
21975 V16QI_type_node, integer_type_node,
21977 tree v16qi_ftype_v16qi_v16qi_v16qi
21978 = build_function_type_list (V16QI_type_node, V16QI_type_node,
21979 V16QI_type_node, V16QI_type_node,
21981 tree v8hi_ftype_v8hi_v8hi_int
21982 = build_function_type_list (V8HI_type_node, V8HI_type_node,
21983 V8HI_type_node, integer_type_node,
21985 tree v4si_ftype_v4si_v4si_int
21986 = build_function_type_list (V4SI_type_node, V4SI_type_node,
21987 V4SI_type_node, integer_type_node,
21989 tree int_ftype_v2di_v2di
21990 = build_function_type_list (integer_type_node,
21991 V2DI_type_node, V2DI_type_node,
21993 tree int_ftype_v16qi_int_v16qi_int_int
21994 = build_function_type_list (integer_type_node,
22001 tree v16qi_ftype_v16qi_int_v16qi_int_int
22002 = build_function_type_list (V16QI_type_node,
22009 tree int_ftype_v16qi_v16qi_int
22010 = build_function_type_list (integer_type_node,
22016 /* SSE5 instructions */
22017 tree v2di_ftype_v2di_v2di_v2di
22018 = build_function_type_list (V2DI_type_node,
22024 tree v4si_ftype_v4si_v4si_v4si
22025 = build_function_type_list (V4SI_type_node,
22031 tree v4si_ftype_v4si_v4si_v2di
22032 = build_function_type_list (V4SI_type_node,
22038 tree v8hi_ftype_v8hi_v8hi_v8hi
22039 = build_function_type_list (V8HI_type_node,
22045 tree v8hi_ftype_v8hi_v8hi_v4si
22046 = build_function_type_list (V8HI_type_node,
22052 tree v2df_ftype_v2df_v2df_v16qi
22053 = build_function_type_list (V2DF_type_node,
22059 tree v4sf_ftype_v4sf_v4sf_v16qi
22060 = build_function_type_list (V4SF_type_node,
22066 tree v2di_ftype_v2di_si
22067 = build_function_type_list (V2DI_type_node,
22072 tree v4si_ftype_v4si_si
22073 = build_function_type_list (V4SI_type_node,
22078 tree v8hi_ftype_v8hi_si
22079 = build_function_type_list (V8HI_type_node,
22084 tree v16qi_ftype_v16qi_si
22085 = build_function_type_list (V16QI_type_node,
22089 tree v4sf_ftype_v4hi
22090 = build_function_type_list (V4SF_type_node,
22094 tree v4hi_ftype_v4sf
22095 = build_function_type_list (V4HI_type_node,
22099 tree v2di_ftype_v2di
22100 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22102 tree v16qi_ftype_v8hi_v8hi
22103 = build_function_type_list (V16QI_type_node,
22104 V8HI_type_node, V8HI_type_node,
22106 tree v8hi_ftype_v4si_v4si
22107 = build_function_type_list (V8HI_type_node,
22108 V4SI_type_node, V4SI_type_node,
22110 tree v8hi_ftype_v16qi_v16qi
22111 = build_function_type_list (V8HI_type_node,
22112 V16QI_type_node, V16QI_type_node,
22114 tree v4hi_ftype_v8qi_v8qi
22115 = build_function_type_list (V4HI_type_node,
22116 V8QI_type_node, V8QI_type_node,
22118 tree unsigned_ftype_unsigned_uchar
22119 = build_function_type_list (unsigned_type_node,
22120 unsigned_type_node,
22121 unsigned_char_type_node,
22123 tree unsigned_ftype_unsigned_ushort
22124 = build_function_type_list (unsigned_type_node,
22125 unsigned_type_node,
22126 short_unsigned_type_node,
22128 tree unsigned_ftype_unsigned_unsigned
22129 = build_function_type_list (unsigned_type_node,
22130 unsigned_type_node,
22131 unsigned_type_node,
22133 tree uint64_ftype_uint64_uint64
22134 = build_function_type_list (long_long_unsigned_type_node,
22135 long_long_unsigned_type_node,
22136 long_long_unsigned_type_node,
22138 tree float_ftype_float
22139 = build_function_type_list (float_type_node,
22144 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22146 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22148 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22150 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22152 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22154 tree v8sf_ftype_v8sf
22155 = build_function_type_list (V8SF_type_node,
22158 tree v8si_ftype_v8sf
22159 = build_function_type_list (V8SI_type_node,
22162 tree v8sf_ftype_v8si
22163 = build_function_type_list (V8SF_type_node,
22166 tree v4si_ftype_v4df
22167 = build_function_type_list (V4SI_type_node,
22170 tree v4df_ftype_v4df
22171 = build_function_type_list (V4DF_type_node,
22174 tree v4df_ftype_v4si
22175 = build_function_type_list (V4DF_type_node,
22178 tree v4df_ftype_v4sf
22179 = build_function_type_list (V4DF_type_node,
22182 tree v4sf_ftype_v4df
22183 = build_function_type_list (V4SF_type_node,
22186 tree v8sf_ftype_v8sf_v8sf
22187 = build_function_type_list (V8SF_type_node,
22188 V8SF_type_node, V8SF_type_node,
22190 tree v4df_ftype_v4df_v4df
22191 = build_function_type_list (V4DF_type_node,
22192 V4DF_type_node, V4DF_type_node,
22194 tree v8sf_ftype_v8sf_int
22195 = build_function_type_list (V8SF_type_node,
22196 V8SF_type_node, integer_type_node,
22198 tree v4si_ftype_v8si_int
22199 = build_function_type_list (V4SI_type_node,
22200 V8SI_type_node, integer_type_node,
22202 tree v4df_ftype_v4df_int
22203 = build_function_type_list (V4DF_type_node,
22204 V4DF_type_node, integer_type_node,
22206 tree v4sf_ftype_v8sf_int
22207 = build_function_type_list (V4SF_type_node,
22208 V8SF_type_node, integer_type_node,
22210 tree v2df_ftype_v4df_int
22211 = build_function_type_list (V2DF_type_node,
22212 V4DF_type_node, integer_type_node,
22214 tree v8sf_ftype_v8sf_v8sf_int
22215 = build_function_type_list (V8SF_type_node,
22216 V8SF_type_node, V8SF_type_node,
22219 tree v8sf_ftype_v8sf_v8sf_v8sf
22220 = build_function_type_list (V8SF_type_node,
22221 V8SF_type_node, V8SF_type_node,
22224 tree v4df_ftype_v4df_v4df_v4df
22225 = build_function_type_list (V4DF_type_node,
22226 V4DF_type_node, V4DF_type_node,
22229 tree v8si_ftype_v8si_v8si_int
22230 = build_function_type_list (V8SI_type_node,
22231 V8SI_type_node, V8SI_type_node,
22234 tree v4df_ftype_v4df_v4df_int
22235 = build_function_type_list (V4DF_type_node,
22236 V4DF_type_node, V4DF_type_node,
22239 tree v8sf_ftype_pcfloat
22240 = build_function_type_list (V8SF_type_node,
22243 tree v4df_ftype_pcdouble
22244 = build_function_type_list (V4DF_type_node,
22245 pcdouble_type_node,
22247 tree pcv4sf_type_node
22248 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22249 tree pcv2df_type_node
22250 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22251 tree v8sf_ftype_pcv4sf
22252 = build_function_type_list (V8SF_type_node,
22255 tree v4df_ftype_pcv2df
22256 = build_function_type_list (V4DF_type_node,
22259 tree v32qi_ftype_pcchar
22260 = build_function_type_list (V32QI_type_node,
22263 tree void_ftype_pchar_v32qi
22264 = build_function_type_list (void_type_node,
22265 pchar_type_node, V32QI_type_node,
22267 tree v8si_ftype_v8si_v4si_int
22268 = build_function_type_list (V8SI_type_node,
22269 V8SI_type_node, V4SI_type_node,
22272 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22273 tree void_ftype_pv4di_v4di
22274 = build_function_type_list (void_type_node,
22275 pv4di_type_node, V4DI_type_node,
22277 tree v8sf_ftype_v8sf_v4sf_int
22278 = build_function_type_list (V8SF_type_node,
22279 V8SF_type_node, V4SF_type_node,
22282 tree v4df_ftype_v4df_v2df_int
22283 = build_function_type_list (V4DF_type_node,
22284 V4DF_type_node, V2DF_type_node,
22287 tree void_ftype_pfloat_v8sf
22288 = build_function_type_list (void_type_node,
22289 pfloat_type_node, V8SF_type_node,
22291 tree void_ftype_pdouble_v4df
22292 = build_function_type_list (void_type_node,
22293 pdouble_type_node, V4DF_type_node,
22295 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22296 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22297 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22298 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22299 tree pcv8sf_type_node
22300 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22301 tree pcv4df_type_node
22302 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22303 tree v8sf_ftype_pcv8sf_v8sf
22304 = build_function_type_list (V8SF_type_node,
22305 pcv8sf_type_node, V8SF_type_node,
22307 tree v4df_ftype_pcv4df_v4df
22308 = build_function_type_list (V4DF_type_node,
22309 pcv4df_type_node, V4DF_type_node,
22311 tree v4sf_ftype_pcv4sf_v4sf
22312 = build_function_type_list (V4SF_type_node,
22313 pcv4sf_type_node, V4SF_type_node,
22315 tree v2df_ftype_pcv2df_v2df
22316 = build_function_type_list (V2DF_type_node,
22317 pcv2df_type_node, V2DF_type_node,
22319 tree void_ftype_pv8sf_v8sf_v8sf
22320 = build_function_type_list (void_type_node,
22321 pv8sf_type_node, V8SF_type_node,
22324 tree void_ftype_pv4df_v4df_v4df
22325 = build_function_type_list (void_type_node,
22326 pv4df_type_node, V4DF_type_node,
22329 tree void_ftype_pv4sf_v4sf_v4sf
22330 = build_function_type_list (void_type_node,
22331 pv4sf_type_node, V4SF_type_node,
22334 tree void_ftype_pv2df_v2df_v2df
22335 = build_function_type_list (void_type_node,
22336 pv2df_type_node, V2DF_type_node,
22339 tree v4df_ftype_v2df
22340 = build_function_type_list (V4DF_type_node,
22343 tree v8sf_ftype_v4sf
22344 = build_function_type_list (V8SF_type_node,
22347 tree v8si_ftype_v4si
22348 = build_function_type_list (V8SI_type_node,
22351 tree v2df_ftype_v4df
22352 = build_function_type_list (V2DF_type_node,
22355 tree v4sf_ftype_v8sf
22356 = build_function_type_list (V4SF_type_node,
22359 tree v4si_ftype_v8si
22360 = build_function_type_list (V4SI_type_node,
22363 tree int_ftype_v4df
22364 = build_function_type_list (integer_type_node,
22367 tree int_ftype_v8sf
22368 = build_function_type_list (integer_type_node,
22371 tree int_ftype_v8sf_v8sf
22372 = build_function_type_list (integer_type_node,
22373 V8SF_type_node, V8SF_type_node,
22375 tree int_ftype_v4di_v4di
22376 = build_function_type_list (integer_type_node,
22377 V4DI_type_node, V4DI_type_node,
22379 tree int_ftype_v4df_v4df
22380 = build_function_type_list (integer_type_node,
22381 V4DF_type_node, V4DF_type_node,
22383 tree v8sf_ftype_v8sf_v8si
22384 = build_function_type_list (V8SF_type_node,
22385 V8SF_type_node, V8SI_type_node,
22387 tree v4df_ftype_v4df_v4di
22388 = build_function_type_list (V4DF_type_node,
22389 V4DF_type_node, V4DI_type_node,
22391 tree v4sf_ftype_v4sf_v4si
22392 = build_function_type_list (V4SF_type_node,
22393 V4SF_type_node, V4SI_type_node, NULL_TREE);
22394 tree v2df_ftype_v2df_v2di
22395 = build_function_type_list (V2DF_type_node,
22396 V2DF_type_node, V2DI_type_node, NULL_TREE);
22400 /* Add all special builtins with variable number of operands. */
22401 for (i = 0, d = bdesc_special_args;
22402 i < ARRAY_SIZE (bdesc_special_args);
22410 switch ((enum ix86_special_builtin_type) d->flag)
22412 case VOID_FTYPE_VOID:
22413 type = void_ftype_void;
22415 case V32QI_FTYPE_PCCHAR:
22416 type = v32qi_ftype_pcchar;
22418 case V16QI_FTYPE_PCCHAR:
22419 type = v16qi_ftype_pcchar;
22421 case V8SF_FTYPE_PCV4SF:
22422 type = v8sf_ftype_pcv4sf;
22424 case V8SF_FTYPE_PCFLOAT:
22425 type = v8sf_ftype_pcfloat;
22427 case V4DF_FTYPE_PCV2DF:
22428 type = v4df_ftype_pcv2df;
22430 case V4DF_FTYPE_PCDOUBLE:
22431 type = v4df_ftype_pcdouble;
22433 case V4SF_FTYPE_PCFLOAT:
22434 type = v4sf_ftype_pcfloat;
22436 case V2DI_FTYPE_PV2DI:
22437 type = v2di_ftype_pv2di;
22439 case V2DF_FTYPE_PCDOUBLE:
22440 type = v2df_ftype_pcdouble;
22442 case V8SF_FTYPE_PCV8SF_V8SF:
22443 type = v8sf_ftype_pcv8sf_v8sf;
22445 case V4DF_FTYPE_PCV4DF_V4DF:
22446 type = v4df_ftype_pcv4df_v4df;
22448 case V4SF_FTYPE_V4SF_PCV2SF:
22449 type = v4sf_ftype_v4sf_pcv2sf;
22451 case V4SF_FTYPE_PCV4SF_V4SF:
22452 type = v4sf_ftype_pcv4sf_v4sf;
22454 case V2DF_FTYPE_V2DF_PCDOUBLE:
22455 type = v2df_ftype_v2df_pcdouble;
22457 case V2DF_FTYPE_PCV2DF_V2DF:
22458 type = v2df_ftype_pcv2df_v2df;
22460 case VOID_FTYPE_PV2SF_V4SF:
22461 type = void_ftype_pv2sf_v4sf;
22463 case VOID_FTYPE_PV4DI_V4DI:
22464 type = void_ftype_pv4di_v4di;
22466 case VOID_FTYPE_PV2DI_V2DI:
22467 type = void_ftype_pv2di_v2di;
22469 case VOID_FTYPE_PCHAR_V32QI:
22470 type = void_ftype_pchar_v32qi;
22472 case VOID_FTYPE_PCHAR_V16QI:
22473 type = void_ftype_pchar_v16qi;
22475 case VOID_FTYPE_PFLOAT_V8SF:
22476 type = void_ftype_pfloat_v8sf;
22478 case VOID_FTYPE_PFLOAT_V4SF:
22479 type = void_ftype_pfloat_v4sf;
22481 case VOID_FTYPE_PDOUBLE_V4DF:
22482 type = void_ftype_pdouble_v4df;
22484 case VOID_FTYPE_PDOUBLE_V2DF:
22485 type = void_ftype_pdouble_v2df;
22487 case VOID_FTYPE_PDI_DI:
22488 type = void_ftype_pdi_di;
22490 case VOID_FTYPE_PINT_INT:
22491 type = void_ftype_pint_int;
22493 case VOID_FTYPE_PV8SF_V8SF_V8SF:
22494 type = void_ftype_pv8sf_v8sf_v8sf;
22496 case VOID_FTYPE_PV4DF_V4DF_V4DF:
22497 type = void_ftype_pv4df_v4df_v4df;
22499 case VOID_FTYPE_PV4SF_V4SF_V4SF:
22500 type = void_ftype_pv4sf_v4sf_v4sf;
22502 case VOID_FTYPE_PV2DF_V2DF_V2DF:
22503 type = void_ftype_pv2df_v2df_v2df;
22506 gcc_unreachable ();
22509 def_builtin (d->mask, d->name, type, d->code);
22512 /* Add all builtins with variable number of operands. */
22513 for (i = 0, d = bdesc_args;
22514 i < ARRAY_SIZE (bdesc_args);
22522 switch ((enum ix86_builtin_type) d->flag)
22524 case FLOAT_FTYPE_FLOAT:
22525 type = float_ftype_float;
22527 case INT_FTYPE_V8SF_V8SF_PTEST:
22528 type = int_ftype_v8sf_v8sf;
22530 case INT_FTYPE_V4DI_V4DI_PTEST:
22531 type = int_ftype_v4di_v4di;
22533 case INT_FTYPE_V4DF_V4DF_PTEST:
22534 type = int_ftype_v4df_v4df;
22536 case INT_FTYPE_V4SF_V4SF_PTEST:
22537 type = int_ftype_v4sf_v4sf;
22539 case INT_FTYPE_V2DI_V2DI_PTEST:
22540 type = int_ftype_v2di_v2di;
22542 case INT_FTYPE_V2DF_V2DF_PTEST:
22543 type = int_ftype_v2df_v2df;
22545 case INT64_FTYPE_V4SF:
22546 type = int64_ftype_v4sf;
22548 case INT64_FTYPE_V2DF:
22549 type = int64_ftype_v2df;
22551 case INT_FTYPE_V16QI:
22552 type = int_ftype_v16qi;
22554 case INT_FTYPE_V8QI:
22555 type = int_ftype_v8qi;
22557 case INT_FTYPE_V8SF:
22558 type = int_ftype_v8sf;
22560 case INT_FTYPE_V4DF:
22561 type = int_ftype_v4df;
22563 case INT_FTYPE_V4SF:
22564 type = int_ftype_v4sf;
22566 case INT_FTYPE_V2DF:
22567 type = int_ftype_v2df;
22569 case V16QI_FTYPE_V16QI:
22570 type = v16qi_ftype_v16qi;
22572 case V8SI_FTYPE_V8SF:
22573 type = v8si_ftype_v8sf;
22575 case V8SI_FTYPE_V4SI:
22576 type = v8si_ftype_v4si;
22578 case V8HI_FTYPE_V8HI:
22579 type = v8hi_ftype_v8hi;
22581 case V8HI_FTYPE_V16QI:
22582 type = v8hi_ftype_v16qi;
22584 case V8QI_FTYPE_V8QI:
22585 type = v8qi_ftype_v8qi;
22587 case V8SF_FTYPE_V8SF:
22588 type = v8sf_ftype_v8sf;
22590 case V8SF_FTYPE_V8SI:
22591 type = v8sf_ftype_v8si;
22593 case V8SF_FTYPE_V4SF:
22594 type = v8sf_ftype_v4sf;
22596 case V4SI_FTYPE_V4DF:
22597 type = v4si_ftype_v4df;
22599 case V4SI_FTYPE_V4SI:
22600 type = v4si_ftype_v4si;
22602 case V4SI_FTYPE_V16QI:
22603 type = v4si_ftype_v16qi;
22605 case V4SI_FTYPE_V8SI:
22606 type = v4si_ftype_v8si;
22608 case V4SI_FTYPE_V8HI:
22609 type = v4si_ftype_v8hi;
22611 case V4SI_FTYPE_V4SF:
22612 type = v4si_ftype_v4sf;
22614 case V4SI_FTYPE_V2DF:
22615 type = v4si_ftype_v2df;
22617 case V4HI_FTYPE_V4HI:
22618 type = v4hi_ftype_v4hi;
22620 case V4DF_FTYPE_V4DF:
22621 type = v4df_ftype_v4df;
22623 case V4DF_FTYPE_V4SI:
22624 type = v4df_ftype_v4si;
22626 case V4DF_FTYPE_V4SF:
22627 type = v4df_ftype_v4sf;
22629 case V4DF_FTYPE_V2DF:
22630 type = v4df_ftype_v2df;
22632 case V4SF_FTYPE_V4SF:
22633 case V4SF_FTYPE_V4SF_VEC_MERGE:
22634 type = v4sf_ftype_v4sf;
22636 case V4SF_FTYPE_V8SF:
22637 type = v4sf_ftype_v8sf;
22639 case V4SF_FTYPE_V4SI:
22640 type = v4sf_ftype_v4si;
22642 case V4SF_FTYPE_V4DF:
22643 type = v4sf_ftype_v4df;
22645 case V4SF_FTYPE_V2DF:
22646 type = v4sf_ftype_v2df;
22648 case V2DI_FTYPE_V2DI:
22649 type = v2di_ftype_v2di;
22651 case V2DI_FTYPE_V16QI:
22652 type = v2di_ftype_v16qi;
22654 case V2DI_FTYPE_V8HI:
22655 type = v2di_ftype_v8hi;
22657 case V2DI_FTYPE_V4SI:
22658 type = v2di_ftype_v4si;
22660 case V2SI_FTYPE_V2SI:
22661 type = v2si_ftype_v2si;
22663 case V2SI_FTYPE_V4SF:
22664 type = v2si_ftype_v4sf;
22666 case V2SI_FTYPE_V2DF:
22667 type = v2si_ftype_v2df;
22669 case V2SI_FTYPE_V2SF:
22670 type = v2si_ftype_v2sf;
22672 case V2DF_FTYPE_V4DF:
22673 type = v2df_ftype_v4df;
22675 case V2DF_FTYPE_V4SF:
22676 type = v2df_ftype_v4sf;
22678 case V2DF_FTYPE_V2DF:
22679 case V2DF_FTYPE_V2DF_VEC_MERGE:
22680 type = v2df_ftype_v2df;
22682 case V2DF_FTYPE_V2SI:
22683 type = v2df_ftype_v2si;
22685 case V2DF_FTYPE_V4SI:
22686 type = v2df_ftype_v4si;
22688 case V2SF_FTYPE_V2SF:
22689 type = v2sf_ftype_v2sf;
22691 case V2SF_FTYPE_V2SI:
22692 type = v2sf_ftype_v2si;
22694 case V16QI_FTYPE_V16QI_V16QI:
22695 type = v16qi_ftype_v16qi_v16qi;
22697 case V16QI_FTYPE_V8HI_V8HI:
22698 type = v16qi_ftype_v8hi_v8hi;
22700 case V8QI_FTYPE_V8QI_V8QI:
22701 type = v8qi_ftype_v8qi_v8qi;
22703 case V8QI_FTYPE_V4HI_V4HI:
22704 type = v8qi_ftype_v4hi_v4hi;
22706 case V8HI_FTYPE_V8HI_V8HI:
22707 case V8HI_FTYPE_V8HI_V8HI_COUNT:
22708 type = v8hi_ftype_v8hi_v8hi;
22710 case V8HI_FTYPE_V16QI_V16QI:
22711 type = v8hi_ftype_v16qi_v16qi;
22713 case V8HI_FTYPE_V4SI_V4SI:
22714 type = v8hi_ftype_v4si_v4si;
22716 case V8HI_FTYPE_V8HI_SI_COUNT:
22717 type = v8hi_ftype_v8hi_int;
22719 case V8SF_FTYPE_V8SF_V8SF:
22720 type = v8sf_ftype_v8sf_v8sf;
22722 case V8SF_FTYPE_V8SF_V8SI:
22723 type = v8sf_ftype_v8sf_v8si;
22725 case V4SI_FTYPE_V4SI_V4SI:
22726 case V4SI_FTYPE_V4SI_V4SI_COUNT:
22727 type = v4si_ftype_v4si_v4si;
22729 case V4SI_FTYPE_V8HI_V8HI:
22730 type = v4si_ftype_v8hi_v8hi;
22732 case V4SI_FTYPE_V4SF_V4SF:
22733 type = v4si_ftype_v4sf_v4sf;
22735 case V4SI_FTYPE_V2DF_V2DF:
22736 type = v4si_ftype_v2df_v2df;
22738 case V4SI_FTYPE_V4SI_SI_COUNT:
22739 type = v4si_ftype_v4si_int;
22741 case V4HI_FTYPE_V4HI_V4HI:
22742 case V4HI_FTYPE_V4HI_V4HI_COUNT:
22743 type = v4hi_ftype_v4hi_v4hi;
22745 case V4HI_FTYPE_V8QI_V8QI:
22746 type = v4hi_ftype_v8qi_v8qi;
22748 case V4HI_FTYPE_V2SI_V2SI:
22749 type = v4hi_ftype_v2si_v2si;
22751 case V4HI_FTYPE_V4HI_SI_COUNT:
22752 type = v4hi_ftype_v4hi_int;
22754 case V4DF_FTYPE_V4DF_V4DF:
22755 type = v4df_ftype_v4df_v4df;
22757 case V4DF_FTYPE_V4DF_V4DI:
22758 type = v4df_ftype_v4df_v4di;
22760 case V4SF_FTYPE_V4SF_V4SF:
22761 case V4SF_FTYPE_V4SF_V4SF_SWAP:
22762 type = v4sf_ftype_v4sf_v4sf;
22764 case V4SF_FTYPE_V4SF_V4SI:
22765 type = v4sf_ftype_v4sf_v4si;
22767 case V4SF_FTYPE_V4SF_V2SI:
22768 type = v4sf_ftype_v4sf_v2si;
22770 case V4SF_FTYPE_V4SF_V2DF:
22771 type = v4sf_ftype_v4sf_v2df;
22773 case V4SF_FTYPE_V4SF_DI:
22774 type = v4sf_ftype_v4sf_int64;
22776 case V4SF_FTYPE_V4SF_SI:
22777 type = v4sf_ftype_v4sf_int;
22779 case V2DI_FTYPE_V2DI_V2DI:
22780 case V2DI_FTYPE_V2DI_V2DI_COUNT:
22781 type = v2di_ftype_v2di_v2di;
22783 case V2DI_FTYPE_V16QI_V16QI:
22784 type = v2di_ftype_v16qi_v16qi;
22786 case V2DI_FTYPE_V4SI_V4SI:
22787 type = v2di_ftype_v4si_v4si;
22789 case V2DI_FTYPE_V2DI_V16QI:
22790 type = v2di_ftype_v2di_v16qi;
22792 case V2DI_FTYPE_V2DF_V2DF:
22793 type = v2di_ftype_v2df_v2df;
22795 case V2DI_FTYPE_V2DI_SI_COUNT:
22796 type = v2di_ftype_v2di_int;
22798 case V2SI_FTYPE_V2SI_V2SI:
22799 case V2SI_FTYPE_V2SI_V2SI_COUNT:
22800 type = v2si_ftype_v2si_v2si;
22802 case V2SI_FTYPE_V4HI_V4HI:
22803 type = v2si_ftype_v4hi_v4hi;
22805 case V2SI_FTYPE_V2SF_V2SF:
22806 type = v2si_ftype_v2sf_v2sf;
22808 case V2SI_FTYPE_V2SI_SI_COUNT:
22809 type = v2si_ftype_v2si_int;
22811 case V2DF_FTYPE_V2DF_V2DF:
22812 case V2DF_FTYPE_V2DF_V2DF_SWAP:
22813 type = v2df_ftype_v2df_v2df;
22815 case V2DF_FTYPE_V2DF_V4SF:
22816 type = v2df_ftype_v2df_v4sf;
22818 case V2DF_FTYPE_V2DF_V2DI:
22819 type = v2df_ftype_v2df_v2di;
22821 case V2DF_FTYPE_V2DF_DI:
22822 type = v2df_ftype_v2df_int64;
22824 case V2DF_FTYPE_V2DF_SI:
22825 type = v2df_ftype_v2df_int;
22827 case V2SF_FTYPE_V2SF_V2SF:
22828 type = v2sf_ftype_v2sf_v2sf;
22830 case V1DI_FTYPE_V1DI_V1DI:
22831 case V1DI_FTYPE_V1DI_V1DI_COUNT:
22832 type = v1di_ftype_v1di_v1di;
22834 case V1DI_FTYPE_V8QI_V8QI:
22835 type = v1di_ftype_v8qi_v8qi;
22837 case V1DI_FTYPE_V2SI_V2SI:
22838 type = v1di_ftype_v2si_v2si;
22840 case V1DI_FTYPE_V1DI_SI_COUNT:
22841 type = v1di_ftype_v1di_int;
22843 case UINT64_FTYPE_UINT64_UINT64:
22844 type = uint64_ftype_uint64_uint64;
22846 case UINT_FTYPE_UINT_UINT:
22847 type = unsigned_ftype_unsigned_unsigned;
22849 case UINT_FTYPE_UINT_USHORT:
22850 type = unsigned_ftype_unsigned_ushort;
22852 case UINT_FTYPE_UINT_UCHAR:
22853 type = unsigned_ftype_unsigned_uchar;
22855 case V8HI_FTYPE_V8HI_INT:
22856 type = v8hi_ftype_v8hi_int;
22858 case V8SF_FTYPE_V8SF_INT:
22859 type = v8sf_ftype_v8sf_int;
22861 case V4SI_FTYPE_V4SI_INT:
22862 type = v4si_ftype_v4si_int;
22864 case V4SI_FTYPE_V8SI_INT:
22865 type = v4si_ftype_v8si_int;
22867 case V4HI_FTYPE_V4HI_INT:
22868 type = v4hi_ftype_v4hi_int;
22870 case V4DF_FTYPE_V4DF_INT:
22871 type = v4df_ftype_v4df_int;
22873 case V4SF_FTYPE_V4SF_INT:
22874 type = v4sf_ftype_v4sf_int;
22876 case V4SF_FTYPE_V8SF_INT:
22877 type = v4sf_ftype_v8sf_int;
22879 case V2DI_FTYPE_V2DI_INT:
22880 case V2DI2TI_FTYPE_V2DI_INT:
22881 type = v2di_ftype_v2di_int;
22883 case V2DF_FTYPE_V2DF_INT:
22884 type = v2df_ftype_v2df_int;
22886 case V2DF_FTYPE_V4DF_INT:
22887 type = v2df_ftype_v4df_int;
22889 case V16QI_FTYPE_V16QI_V16QI_V16QI:
22890 type = v16qi_ftype_v16qi_v16qi_v16qi;
22892 case V8SF_FTYPE_V8SF_V8SF_V8SF:
22893 type = v8sf_ftype_v8sf_v8sf_v8sf;
22895 case V4DF_FTYPE_V4DF_V4DF_V4DF:
22896 type = v4df_ftype_v4df_v4df_v4df;
22898 case V4SF_FTYPE_V4SF_V4SF_V4SF:
22899 type = v4sf_ftype_v4sf_v4sf_v4sf;
22901 case V2DF_FTYPE_V2DF_V2DF_V2DF:
22902 type = v2df_ftype_v2df_v2df_v2df;
22904 case V16QI_FTYPE_V16QI_V16QI_INT:
22905 type = v16qi_ftype_v16qi_v16qi_int;
22907 case V8SI_FTYPE_V8SI_V8SI_INT:
22908 type = v8si_ftype_v8si_v8si_int;
22910 case V8SI_FTYPE_V8SI_V4SI_INT:
22911 type = v8si_ftype_v8si_v4si_int;
22913 case V8HI_FTYPE_V8HI_V8HI_INT:
22914 type = v8hi_ftype_v8hi_v8hi_int;
22916 case V8SF_FTYPE_V8SF_V8SF_INT:
22917 type = v8sf_ftype_v8sf_v8sf_int;
22919 case V8SF_FTYPE_V8SF_V4SF_INT:
22920 type = v8sf_ftype_v8sf_v4sf_int;
22922 case V4SI_FTYPE_V4SI_V4SI_INT:
22923 type = v4si_ftype_v4si_v4si_int;
22925 case V4DF_FTYPE_V4DF_V4DF_INT:
22926 type = v4df_ftype_v4df_v4df_int;
22928 case V4DF_FTYPE_V4DF_V2DF_INT:
22929 type = v4df_ftype_v4df_v2df_int;
22931 case V4SF_FTYPE_V4SF_V4SF_INT:
22932 type = v4sf_ftype_v4sf_v4sf_int;
22934 case V2DI_FTYPE_V2DI_V2DI_INT:
22935 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
22936 type = v2di_ftype_v2di_v2di_int;
22938 case V2DF_FTYPE_V2DF_V2DF_INT:
22939 type = v2df_ftype_v2df_v2df_int;
22941 case V2DI_FTYPE_V2DI_UINT_UINT:
22942 type = v2di_ftype_v2di_unsigned_unsigned;
22944 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
22945 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
22947 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
22948 type = v1di_ftype_v1di_v1di_int;
22951 gcc_unreachable ();
22954 def_builtin_const (d->mask, d->name, type, d->code);
22957 /* pcmpestr[im] insns. */
22958 for (i = 0, d = bdesc_pcmpestr;
22959 i < ARRAY_SIZE (bdesc_pcmpestr);
22962 if (d->code == IX86_BUILTIN_PCMPESTRM128)
22963 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
22965 ftype = int_ftype_v16qi_int_v16qi_int_int;
22966 def_builtin_const (d->mask, d->name, ftype, d->code);
22969 /* pcmpistr[im] insns. */
22970 for (i = 0, d = bdesc_pcmpistr;
22971 i < ARRAY_SIZE (bdesc_pcmpistr);
22974 if (d->code == IX86_BUILTIN_PCMPISTRM128)
22975 ftype = v16qi_ftype_v16qi_v16qi_int;
22977 ftype = int_ftype_v16qi_v16qi_int;
22978 def_builtin_const (d->mask, d->name, ftype, d->code);
22981 /* comi/ucomi insns. */
22982 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
22983 if (d->mask == OPTION_MASK_ISA_SSE2)
22984 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
22986 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
22989 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
22990 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
22992 /* SSE or 3DNow!A */
22993 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
22996 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
22998 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
22999 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23002 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23003 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23006 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23007 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23008 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23009 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23010 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23011 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23014 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23017 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23018 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23020 /* Access to the vec_init patterns. */
23021 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23022 integer_type_node, NULL_TREE);
23023 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23025 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23026 short_integer_type_node,
23027 short_integer_type_node,
23028 short_integer_type_node, NULL_TREE);
23029 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23031 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23032 char_type_node, char_type_node,
23033 char_type_node, char_type_node,
23034 char_type_node, char_type_node,
23035 char_type_node, NULL_TREE);
23036 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23038 /* Access to the vec_extract patterns. */
23039 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23040 integer_type_node, NULL_TREE);
23041 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23043 ftype = build_function_type_list (long_long_integer_type_node,
23044 V2DI_type_node, integer_type_node,
23046 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23048 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23049 integer_type_node, NULL_TREE);
23050 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23052 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23053 integer_type_node, NULL_TREE);
23054 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23056 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23057 integer_type_node, NULL_TREE);
23058 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23060 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23061 integer_type_node, NULL_TREE);
23062 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23064 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23065 integer_type_node, NULL_TREE);
23066 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23068 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23069 integer_type_node, NULL_TREE);
23070 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23072 /* Access to the vec_set patterns. */
23073 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23075 integer_type_node, NULL_TREE);
23076 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23078 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23080 integer_type_node, NULL_TREE);
23081 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23083 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23085 integer_type_node, NULL_TREE);
23086 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23088 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23090 integer_type_node, NULL_TREE);
23091 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23093 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23095 integer_type_node, NULL_TREE);
23096 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23098 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23100 integer_type_node, NULL_TREE);
23101 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23103 /* Add SSE5 multi-arg argument instructions */
23104 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23106 tree mtype = NULL_TREE;
23111 switch ((enum multi_arg_type)d->flag)
23113 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23114 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23115 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23116 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23117 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23118 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23119 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23120 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23121 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23122 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23123 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23124 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23125 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23126 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23127 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23128 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23129 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23130 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23131 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23132 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23133 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23134 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23135 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23136 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23137 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23138 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23139 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23140 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23141 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23142 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23143 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23144 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23145 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23146 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23147 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23148 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23149 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23150 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23151 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23152 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23153 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23154 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23155 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23156 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23157 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23158 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23159 case MULTI_ARG_UNKNOWN:
23161 gcc_unreachable ();
23165 def_builtin_const (d->mask, d->name, mtype, d->code);
23169 /* Internal method for ix86_init_builtins. */
23172 ix86_init_builtins_va_builtins_abi (void)
23174 tree ms_va_ref, sysv_va_ref;
23175 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23176 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23177 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23178 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23182 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23183 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23184 ms_va_ref = build_reference_type (ms_va_list_type_node);
23186 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23189 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23190 fnvoid_va_start_ms =
23191 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23192 fnvoid_va_end_sysv =
23193 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23194 fnvoid_va_start_sysv =
23195 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23197 fnvoid_va_copy_ms =
23198 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23200 fnvoid_va_copy_sysv =
23201 build_function_type_list (void_type_node, sysv_va_ref,
23202 sysv_va_ref, NULL_TREE);
23204 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23205 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23206 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23207 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23208 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23209 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23210 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23211 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23212 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23213 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23214 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23215 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23219 ix86_init_builtins (void)
23221 tree float128_type_node = make_node (REAL_TYPE);
23224 /* The __float80 type. */
23225 if (TYPE_MODE (long_double_type_node) == XFmode)
23226 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23230 /* The __float80 type. */
23231 tree float80_type_node = make_node (REAL_TYPE);
23233 TYPE_PRECISION (float80_type_node) = 80;
23234 layout_type (float80_type_node);
23235 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23239 /* The __float128 type. */
23240 TYPE_PRECISION (float128_type_node) = 128;
23241 layout_type (float128_type_node);
23242 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23245 /* TFmode support builtins. */
23246 ftype = build_function_type (float128_type_node, void_list_node);
23247 decl = add_builtin_function ("__builtin_infq", ftype,
23248 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23250 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23252 /* We will expand them to normal call if SSE2 isn't available since
23253 they are used by libgcc. */
23254 ftype = build_function_type_list (float128_type_node,
23255 float128_type_node,
23257 decl = add_builtin_function ("__builtin_fabsq", ftype,
23258 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23259 "__fabstf2", NULL_TREE);
23260 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23261 TREE_READONLY (decl) = 1;
23263 ftype = build_function_type_list (float128_type_node,
23264 float128_type_node,
23265 float128_type_node,
23267 decl = add_builtin_function ("__builtin_copysignq", ftype,
23268 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23269 "__copysigntf3", NULL_TREE);
23270 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23271 TREE_READONLY (decl) = 1;
23273 ix86_init_mmx_sse_builtins ();
23275 ix86_init_builtins_va_builtins_abi ();
23278 /* Errors in the source file can cause expand_expr to return const0_rtx
23279 where we expect a vector. To avoid crashing, use one of the vector
23280 clear instructions. */
23282 safe_vector_operand (rtx x, enum machine_mode mode)
23284 if (x == const0_rtx)
23285 x = CONST0_RTX (mode);
23289 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23292 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23295 tree arg0 = CALL_EXPR_ARG (exp, 0);
23296 tree arg1 = CALL_EXPR_ARG (exp, 1);
23297 rtx op0 = expand_normal (arg0);
23298 rtx op1 = expand_normal (arg1);
23299 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23300 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23301 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23303 if (VECTOR_MODE_P (mode0))
23304 op0 = safe_vector_operand (op0, mode0);
23305 if (VECTOR_MODE_P (mode1))
23306 op1 = safe_vector_operand (op1, mode1);
23308 if (optimize || !target
23309 || GET_MODE (target) != tmode
23310 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23311 target = gen_reg_rtx (tmode);
23313 if (GET_MODE (op1) == SImode && mode1 == TImode)
23315 rtx x = gen_reg_rtx (V4SImode);
23316 emit_insn (gen_sse2_loadd (x, op1));
23317 op1 = gen_lowpart (TImode, x);
23320 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23321 op0 = copy_to_mode_reg (mode0, op0);
23322 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23323 op1 = copy_to_mode_reg (mode1, op1);
23325 pat = GEN_FCN (icode) (target, op0, op1);
23334 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23337 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23338 enum multi_arg_type m_type,
23339 enum insn_code sub_code)
23344 bool comparison_p = false;
23346 bool last_arg_constant = false;
23347 int num_memory = 0;
23350 enum machine_mode mode;
23353 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23357 case MULTI_ARG_3_SF:
23358 case MULTI_ARG_3_DF:
23359 case MULTI_ARG_3_DI:
23360 case MULTI_ARG_3_SI:
23361 case MULTI_ARG_3_SI_DI:
23362 case MULTI_ARG_3_HI:
23363 case MULTI_ARG_3_HI_SI:
23364 case MULTI_ARG_3_QI:
23365 case MULTI_ARG_3_PERMPS:
23366 case MULTI_ARG_3_PERMPD:
23370 case MULTI_ARG_2_SF:
23371 case MULTI_ARG_2_DF:
23372 case MULTI_ARG_2_DI:
23373 case MULTI_ARG_2_SI:
23374 case MULTI_ARG_2_HI:
23375 case MULTI_ARG_2_QI:
23379 case MULTI_ARG_2_DI_IMM:
23380 case MULTI_ARG_2_SI_IMM:
23381 case MULTI_ARG_2_HI_IMM:
23382 case MULTI_ARG_2_QI_IMM:
23384 last_arg_constant = true;
23387 case MULTI_ARG_1_SF:
23388 case MULTI_ARG_1_DF:
23389 case MULTI_ARG_1_DI:
23390 case MULTI_ARG_1_SI:
23391 case MULTI_ARG_1_HI:
23392 case MULTI_ARG_1_QI:
23393 case MULTI_ARG_1_SI_DI:
23394 case MULTI_ARG_1_HI_DI:
23395 case MULTI_ARG_1_HI_SI:
23396 case MULTI_ARG_1_QI_DI:
23397 case MULTI_ARG_1_QI_SI:
23398 case MULTI_ARG_1_QI_HI:
23399 case MULTI_ARG_1_PH2PS:
23400 case MULTI_ARG_1_PS2PH:
23404 case MULTI_ARG_2_SF_CMP:
23405 case MULTI_ARG_2_DF_CMP:
23406 case MULTI_ARG_2_DI_CMP:
23407 case MULTI_ARG_2_SI_CMP:
23408 case MULTI_ARG_2_HI_CMP:
23409 case MULTI_ARG_2_QI_CMP:
23411 comparison_p = true;
23414 case MULTI_ARG_2_SF_TF:
23415 case MULTI_ARG_2_DF_TF:
23416 case MULTI_ARG_2_DI_TF:
23417 case MULTI_ARG_2_SI_TF:
23418 case MULTI_ARG_2_HI_TF:
23419 case MULTI_ARG_2_QI_TF:
23424 case MULTI_ARG_UNKNOWN:
23426 gcc_unreachable ();
23429 if (optimize || !target
23430 || GET_MODE (target) != tmode
23431 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23432 target = gen_reg_rtx (tmode);
23434 gcc_assert (nargs <= 4);
23436 for (i = 0; i < nargs; i++)
23438 tree arg = CALL_EXPR_ARG (exp, i);
23439 rtx op = expand_normal (arg);
23440 int adjust = (comparison_p) ? 1 : 0;
23441 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23443 if (last_arg_constant && i == nargs-1)
23445 if (GET_CODE (op) != CONST_INT)
23447 error ("last argument must be an immediate");
23448 return gen_reg_rtx (tmode);
23453 if (VECTOR_MODE_P (mode))
23454 op = safe_vector_operand (op, mode);
23456 /* If we aren't optimizing, only allow one memory operand to be
23458 if (memory_operand (op, mode))
23461 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23464 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
23466 op = force_reg (mode, op);
23470 args[i].mode = mode;
23476 pat = GEN_FCN (icode) (target, args[0].op);
23481 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23482 GEN_INT ((int)sub_code));
23483 else if (! comparison_p)
23484 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23487 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23491 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23496 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23500 gcc_unreachable ();
23510 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23511 insns with vec_merge. */
23514 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23518 tree arg0 = CALL_EXPR_ARG (exp, 0);
23519 rtx op1, op0 = expand_normal (arg0);
23520 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23521 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23523 if (optimize || !target
23524 || GET_MODE (target) != tmode
23525 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23526 target = gen_reg_rtx (tmode);
23528 if (VECTOR_MODE_P (mode0))
23529 op0 = safe_vector_operand (op0, mode0);
23531 if ((optimize && !register_operand (op0, mode0))
23532 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23533 op0 = copy_to_mode_reg (mode0, op0);
23536 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
23537 op1 = copy_to_mode_reg (mode0, op1);
23539 pat = GEN_FCN (icode) (target, op0, op1);
23546 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23549 ix86_expand_sse_compare (const struct builtin_description *d,
23550 tree exp, rtx target, bool swap)
23553 tree arg0 = CALL_EXPR_ARG (exp, 0);
23554 tree arg1 = CALL_EXPR_ARG (exp, 1);
23555 rtx op0 = expand_normal (arg0);
23556 rtx op1 = expand_normal (arg1);
23558 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23559 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23560 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23561 enum rtx_code comparison = d->comparison;
23563 if (VECTOR_MODE_P (mode0))
23564 op0 = safe_vector_operand (op0, mode0);
23565 if (VECTOR_MODE_P (mode1))
23566 op1 = safe_vector_operand (op1, mode1);
23568 /* Swap operands if we have a comparison that isn't available in
23572 rtx tmp = gen_reg_rtx (mode1);
23573 emit_move_insn (tmp, op1);
23578 if (optimize || !target
23579 || GET_MODE (target) != tmode
23580 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
23581 target = gen_reg_rtx (tmode);
23583 if ((optimize && !register_operand (op0, mode0))
23584 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
23585 op0 = copy_to_mode_reg (mode0, op0);
23586 if ((optimize && !register_operand (op1, mode1))
23587 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
23588 op1 = copy_to_mode_reg (mode1, op1);
23590 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23591 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23598 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
23601 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23605 tree arg0 = CALL_EXPR_ARG (exp, 0);
23606 tree arg1 = CALL_EXPR_ARG (exp, 1);
23607 rtx op0 = expand_normal (arg0);
23608 rtx op1 = expand_normal (arg1);
23609 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23610 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23611 enum rtx_code comparison = d->comparison;
23613 if (VECTOR_MODE_P (mode0))
23614 op0 = safe_vector_operand (op0, mode0);
23615 if (VECTOR_MODE_P (mode1))
23616 op1 = safe_vector_operand (op1, mode1);
23618 /* Swap operands if we have a comparison that isn't available in
23620 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
23627 target = gen_reg_rtx (SImode);
23628 emit_move_insn (target, const0_rtx);
23629 target = gen_rtx_SUBREG (QImode, target, 0);
23631 if ((optimize && !register_operand (op0, mode0))
23632 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23633 op0 = copy_to_mode_reg (mode0, op0);
23634 if ((optimize && !register_operand (op1, mode1))
23635 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23636 op1 = copy_to_mode_reg (mode1, op1);
23638 pat = GEN_FCN (d->icode) (op0, op1);
23642 emit_insn (gen_rtx_SET (VOIDmode,
23643 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23644 gen_rtx_fmt_ee (comparison, QImode,
23648 return SUBREG_REG (target);
23651 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
23654 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
23658 tree arg0 = CALL_EXPR_ARG (exp, 0);
23659 tree arg1 = CALL_EXPR_ARG (exp, 1);
23660 rtx op0 = expand_normal (arg0);
23661 rtx op1 = expand_normal (arg1);
23662 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23663 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23664 enum rtx_code comparison = d->comparison;
23666 if (VECTOR_MODE_P (mode0))
23667 op0 = safe_vector_operand (op0, mode0);
23668 if (VECTOR_MODE_P (mode1))
23669 op1 = safe_vector_operand (op1, mode1);
23671 target = gen_reg_rtx (SImode);
23672 emit_move_insn (target, const0_rtx);
23673 target = gen_rtx_SUBREG (QImode, target, 0);
23675 if ((optimize && !register_operand (op0, mode0))
23676 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23677 op0 = copy_to_mode_reg (mode0, op0);
23678 if ((optimize && !register_operand (op1, mode1))
23679 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23680 op1 = copy_to_mode_reg (mode1, op1);
23682 pat = GEN_FCN (d->icode) (op0, op1);
23686 emit_insn (gen_rtx_SET (VOIDmode,
23687 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23688 gen_rtx_fmt_ee (comparison, QImode,
23692 return SUBREG_REG (target);
23695 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
23698 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
23699 tree exp, rtx target)
23702 tree arg0 = CALL_EXPR_ARG (exp, 0);
23703 tree arg1 = CALL_EXPR_ARG (exp, 1);
23704 tree arg2 = CALL_EXPR_ARG (exp, 2);
23705 tree arg3 = CALL_EXPR_ARG (exp, 3);
23706 tree arg4 = CALL_EXPR_ARG (exp, 4);
23707 rtx scratch0, scratch1;
23708 rtx op0 = expand_normal (arg0);
23709 rtx op1 = expand_normal (arg1);
23710 rtx op2 = expand_normal (arg2);
23711 rtx op3 = expand_normal (arg3);
23712 rtx op4 = expand_normal (arg4);
23713 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
23715 tmode0 = insn_data[d->icode].operand[0].mode;
23716 tmode1 = insn_data[d->icode].operand[1].mode;
23717 modev2 = insn_data[d->icode].operand[2].mode;
23718 modei3 = insn_data[d->icode].operand[3].mode;
23719 modev4 = insn_data[d->icode].operand[4].mode;
23720 modei5 = insn_data[d->icode].operand[5].mode;
23721 modeimm = insn_data[d->icode].operand[6].mode;
23723 if (VECTOR_MODE_P (modev2))
23724 op0 = safe_vector_operand (op0, modev2);
23725 if (VECTOR_MODE_P (modev4))
23726 op2 = safe_vector_operand (op2, modev4);
23728 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23729 op0 = copy_to_mode_reg (modev2, op0);
23730 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
23731 op1 = copy_to_mode_reg (modei3, op1);
23732 if ((optimize && !register_operand (op2, modev4))
23733 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
23734 op2 = copy_to_mode_reg (modev4, op2);
23735 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
23736 op3 = copy_to_mode_reg (modei5, op3);
23738 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
23740 error ("the fifth argument must be a 8-bit immediate");
23744 if (d->code == IX86_BUILTIN_PCMPESTRI128)
23746 if (optimize || !target
23747 || GET_MODE (target) != tmode0
23748 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23749 target = gen_reg_rtx (tmode0);
23751 scratch1 = gen_reg_rtx (tmode1);
23753 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
23755 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
23757 if (optimize || !target
23758 || GET_MODE (target) != tmode1
23759 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23760 target = gen_reg_rtx (tmode1);
23762 scratch0 = gen_reg_rtx (tmode0);
23764 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
23768 gcc_assert (d->flag);
23770 scratch0 = gen_reg_rtx (tmode0);
23771 scratch1 = gen_reg_rtx (tmode1);
23773 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
23783 target = gen_reg_rtx (SImode);
23784 emit_move_insn (target, const0_rtx);
23785 target = gen_rtx_SUBREG (QImode, target, 0);
23788 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23789 gen_rtx_fmt_ee (EQ, QImode,
23790 gen_rtx_REG ((enum machine_mode) d->flag,
23793 return SUBREG_REG (target);
23800 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
23803 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
23804 tree exp, rtx target)
23807 tree arg0 = CALL_EXPR_ARG (exp, 0);
23808 tree arg1 = CALL_EXPR_ARG (exp, 1);
23809 tree arg2 = CALL_EXPR_ARG (exp, 2);
23810 rtx scratch0, scratch1;
23811 rtx op0 = expand_normal (arg0);
23812 rtx op1 = expand_normal (arg1);
23813 rtx op2 = expand_normal (arg2);
23814 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
23816 tmode0 = insn_data[d->icode].operand[0].mode;
23817 tmode1 = insn_data[d->icode].operand[1].mode;
23818 modev2 = insn_data[d->icode].operand[2].mode;
23819 modev3 = insn_data[d->icode].operand[3].mode;
23820 modeimm = insn_data[d->icode].operand[4].mode;
23822 if (VECTOR_MODE_P (modev2))
23823 op0 = safe_vector_operand (op0, modev2);
23824 if (VECTOR_MODE_P (modev3))
23825 op1 = safe_vector_operand (op1, modev3);
23827 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23828 op0 = copy_to_mode_reg (modev2, op0);
23829 if ((optimize && !register_operand (op1, modev3))
23830 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
23831 op1 = copy_to_mode_reg (modev3, op1);
23833 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
23835 error ("the third argument must be a 8-bit immediate");
23839 if (d->code == IX86_BUILTIN_PCMPISTRI128)
23841 if (optimize || !target
23842 || GET_MODE (target) != tmode0
23843 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23844 target = gen_reg_rtx (tmode0);
23846 scratch1 = gen_reg_rtx (tmode1);
23848 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
23850 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
23852 if (optimize || !target
23853 || GET_MODE (target) != tmode1
23854 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23855 target = gen_reg_rtx (tmode1);
23857 scratch0 = gen_reg_rtx (tmode0);
23859 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
23863 gcc_assert (d->flag);
23865 scratch0 = gen_reg_rtx (tmode0);
23866 scratch1 = gen_reg_rtx (tmode1);
23868 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
23878 target = gen_reg_rtx (SImode);
23879 emit_move_insn (target, const0_rtx);
23880 target = gen_rtx_SUBREG (QImode, target, 0);
23883 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23884 gen_rtx_fmt_ee (EQ, QImode,
23885 gen_rtx_REG ((enum machine_mode) d->flag,
23888 return SUBREG_REG (target);
23894 /* Subroutine of ix86_expand_builtin to take care of insns with
23895 variable number of operands. */
23898 ix86_expand_args_builtin (const struct builtin_description *d,
23899 tree exp, rtx target)
23901 rtx pat, real_target;
23902 unsigned int i, nargs;
23903 unsigned int nargs_constant = 0;
23904 int num_memory = 0;
23908 enum machine_mode mode;
23910 bool last_arg_count = false;
23911 enum insn_code icode = d->icode;
23912 const struct insn_data *insn_p = &insn_data[icode];
23913 enum machine_mode tmode = insn_p->operand[0].mode;
23914 enum machine_mode rmode = VOIDmode;
23916 enum rtx_code comparison = d->comparison;
23918 switch ((enum ix86_builtin_type) d->flag)
23920 case INT_FTYPE_V8SF_V8SF_PTEST:
23921 case INT_FTYPE_V4DI_V4DI_PTEST:
23922 case INT_FTYPE_V4DF_V4DF_PTEST:
23923 case INT_FTYPE_V4SF_V4SF_PTEST:
23924 case INT_FTYPE_V2DI_V2DI_PTEST:
23925 case INT_FTYPE_V2DF_V2DF_PTEST:
23926 return ix86_expand_sse_ptest (d, exp, target);
23927 case FLOAT128_FTYPE_FLOAT128:
23928 case FLOAT_FTYPE_FLOAT:
23929 case INT64_FTYPE_V4SF:
23930 case INT64_FTYPE_V2DF:
23931 case INT_FTYPE_V16QI:
23932 case INT_FTYPE_V8QI:
23933 case INT_FTYPE_V8SF:
23934 case INT_FTYPE_V4DF:
23935 case INT_FTYPE_V4SF:
23936 case INT_FTYPE_V2DF:
23937 case V16QI_FTYPE_V16QI:
23938 case V8SI_FTYPE_V8SF:
23939 case V8SI_FTYPE_V4SI:
23940 case V8HI_FTYPE_V8HI:
23941 case V8HI_FTYPE_V16QI:
23942 case V8QI_FTYPE_V8QI:
23943 case V8SF_FTYPE_V8SF:
23944 case V8SF_FTYPE_V8SI:
23945 case V8SF_FTYPE_V4SF:
23946 case V4SI_FTYPE_V4SI:
23947 case V4SI_FTYPE_V16QI:
23948 case V4SI_FTYPE_V4SF:
23949 case V4SI_FTYPE_V8SI:
23950 case V4SI_FTYPE_V8HI:
23951 case V4SI_FTYPE_V4DF:
23952 case V4SI_FTYPE_V2DF:
23953 case V4HI_FTYPE_V4HI:
23954 case V4DF_FTYPE_V4DF:
23955 case V4DF_FTYPE_V4SI:
23956 case V4DF_FTYPE_V4SF:
23957 case V4DF_FTYPE_V2DF:
23958 case V4SF_FTYPE_V4SF:
23959 case V4SF_FTYPE_V4SI:
23960 case V4SF_FTYPE_V8SF:
23961 case V4SF_FTYPE_V4DF:
23962 case V4SF_FTYPE_V2DF:
23963 case V2DI_FTYPE_V2DI:
23964 case V2DI_FTYPE_V16QI:
23965 case V2DI_FTYPE_V8HI:
23966 case V2DI_FTYPE_V4SI:
23967 case V2DF_FTYPE_V2DF:
23968 case V2DF_FTYPE_V4SI:
23969 case V2DF_FTYPE_V4DF:
23970 case V2DF_FTYPE_V4SF:
23971 case V2DF_FTYPE_V2SI:
23972 case V2SI_FTYPE_V2SI:
23973 case V2SI_FTYPE_V4SF:
23974 case V2SI_FTYPE_V2SF:
23975 case V2SI_FTYPE_V2DF:
23976 case V2SF_FTYPE_V2SF:
23977 case V2SF_FTYPE_V2SI:
23980 case V4SF_FTYPE_V4SF_VEC_MERGE:
23981 case V2DF_FTYPE_V2DF_VEC_MERGE:
23982 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
23983 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
23984 case V16QI_FTYPE_V16QI_V16QI:
23985 case V16QI_FTYPE_V8HI_V8HI:
23986 case V8QI_FTYPE_V8QI_V8QI:
23987 case V8QI_FTYPE_V4HI_V4HI:
23988 case V8HI_FTYPE_V8HI_V8HI:
23989 case V8HI_FTYPE_V16QI_V16QI:
23990 case V8HI_FTYPE_V4SI_V4SI:
23991 case V8SF_FTYPE_V8SF_V8SF:
23992 case V8SF_FTYPE_V8SF_V8SI:
23993 case V4SI_FTYPE_V4SI_V4SI:
23994 case V4SI_FTYPE_V8HI_V8HI:
23995 case V4SI_FTYPE_V4SF_V4SF:
23996 case V4SI_FTYPE_V2DF_V2DF:
23997 case V4HI_FTYPE_V4HI_V4HI:
23998 case V4HI_FTYPE_V8QI_V8QI:
23999 case V4HI_FTYPE_V2SI_V2SI:
24000 case V4DF_FTYPE_V4DF_V4DF:
24001 case V4DF_FTYPE_V4DF_V4DI:
24002 case V4SF_FTYPE_V4SF_V4SF:
24003 case V4SF_FTYPE_V4SF_V4SI:
24004 case V4SF_FTYPE_V4SF_V2SI:
24005 case V4SF_FTYPE_V4SF_V2DF:
24006 case V4SF_FTYPE_V4SF_DI:
24007 case V4SF_FTYPE_V4SF_SI:
24008 case V2DI_FTYPE_V2DI_V2DI:
24009 case V2DI_FTYPE_V16QI_V16QI:
24010 case V2DI_FTYPE_V4SI_V4SI:
24011 case V2DI_FTYPE_V2DI_V16QI:
24012 case V2DI_FTYPE_V2DF_V2DF:
24013 case V2SI_FTYPE_V2SI_V2SI:
24014 case V2SI_FTYPE_V4HI_V4HI:
24015 case V2SI_FTYPE_V2SF_V2SF:
24016 case V2DF_FTYPE_V2DF_V2DF:
24017 case V2DF_FTYPE_V2DF_V4SF:
24018 case V2DF_FTYPE_V2DF_V2DI:
24019 case V2DF_FTYPE_V2DF_DI:
24020 case V2DF_FTYPE_V2DF_SI:
24021 case V2SF_FTYPE_V2SF_V2SF:
24022 case V1DI_FTYPE_V1DI_V1DI:
24023 case V1DI_FTYPE_V8QI_V8QI:
24024 case V1DI_FTYPE_V2SI_V2SI:
24025 if (comparison == UNKNOWN)
24026 return ix86_expand_binop_builtin (icode, exp, target);
24029 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24030 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24031 gcc_assert (comparison != UNKNOWN);
24035 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24036 case V8HI_FTYPE_V8HI_SI_COUNT:
24037 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24038 case V4SI_FTYPE_V4SI_SI_COUNT:
24039 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24040 case V4HI_FTYPE_V4HI_SI_COUNT:
24041 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24042 case V2DI_FTYPE_V2DI_SI_COUNT:
24043 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24044 case V2SI_FTYPE_V2SI_SI_COUNT:
24045 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24046 case V1DI_FTYPE_V1DI_SI_COUNT:
24048 last_arg_count = true;
24050 case UINT64_FTYPE_UINT64_UINT64:
24051 case UINT_FTYPE_UINT_UINT:
24052 case UINT_FTYPE_UINT_USHORT:
24053 case UINT_FTYPE_UINT_UCHAR:
24056 case V2DI2TI_FTYPE_V2DI_INT:
24059 nargs_constant = 1;
24061 case V8HI_FTYPE_V8HI_INT:
24062 case V8SF_FTYPE_V8SF_INT:
24063 case V4SI_FTYPE_V4SI_INT:
24064 case V4SI_FTYPE_V8SI_INT:
24065 case V4HI_FTYPE_V4HI_INT:
24066 case V4DF_FTYPE_V4DF_INT:
24067 case V4SF_FTYPE_V4SF_INT:
24068 case V4SF_FTYPE_V8SF_INT:
24069 case V2DI_FTYPE_V2DI_INT:
24070 case V2DF_FTYPE_V2DF_INT:
24071 case V2DF_FTYPE_V4DF_INT:
24073 nargs_constant = 1;
24075 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24076 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24077 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24078 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24079 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24082 case V16QI_FTYPE_V16QI_V16QI_INT:
24083 case V8HI_FTYPE_V8HI_V8HI_INT:
24084 case V8SI_FTYPE_V8SI_V8SI_INT:
24085 case V8SI_FTYPE_V8SI_V4SI_INT:
24086 case V8SF_FTYPE_V8SF_V8SF_INT:
24087 case V8SF_FTYPE_V8SF_V4SF_INT:
24088 case V4SI_FTYPE_V4SI_V4SI_INT:
24089 case V4DF_FTYPE_V4DF_V4DF_INT:
24090 case V4DF_FTYPE_V4DF_V2DF_INT:
24091 case V4SF_FTYPE_V4SF_V4SF_INT:
24092 case V2DI_FTYPE_V2DI_V2DI_INT:
24093 case V2DF_FTYPE_V2DF_V2DF_INT:
24095 nargs_constant = 1;
24097 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24100 nargs_constant = 1;
24102 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24105 nargs_constant = 1;
24107 case V2DI_FTYPE_V2DI_UINT_UINT:
24109 nargs_constant = 2;
24111 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24113 nargs_constant = 2;
24116 gcc_unreachable ();
24119 gcc_assert (nargs <= ARRAY_SIZE (args));
24121 if (comparison != UNKNOWN)
24123 gcc_assert (nargs == 2);
24124 return ix86_expand_sse_compare (d, exp, target, swap);
24127 if (rmode == VOIDmode || rmode == tmode)
24131 || GET_MODE (target) != tmode
24132 || ! (*insn_p->operand[0].predicate) (target, tmode))
24133 target = gen_reg_rtx (tmode);
24134 real_target = target;
24138 target = gen_reg_rtx (rmode);
24139 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24142 for (i = 0; i < nargs; i++)
24144 tree arg = CALL_EXPR_ARG (exp, i);
24145 rtx op = expand_normal (arg);
24146 enum machine_mode mode = insn_p->operand[i + 1].mode;
24147 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24149 if (last_arg_count && (i + 1) == nargs)
24151 /* SIMD shift insns take either an 8-bit immediate or
24152 register as count. But builtin functions take int as
24153 count. If count doesn't match, we put it in register. */
24156 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24157 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24158 op = copy_to_reg (op);
24161 else if ((nargs - i) <= nargs_constant)
24166 case CODE_FOR_sse4_1_roundpd:
24167 case CODE_FOR_sse4_1_roundps:
24168 case CODE_FOR_sse4_1_roundsd:
24169 case CODE_FOR_sse4_1_roundss:
24170 case CODE_FOR_sse4_1_blendps:
24171 case CODE_FOR_avx_blendpd256:
24172 case CODE_FOR_avx_vpermilv4df:
24173 case CODE_FOR_avx_roundpd256:
24174 case CODE_FOR_avx_roundps256:
24175 error ("the last argument must be a 4-bit immediate");
24178 case CODE_FOR_sse4_1_blendpd:
24179 case CODE_FOR_avx_vpermilv2df:
24180 error ("the last argument must be a 2-bit immediate");
24183 case CODE_FOR_avx_vextractf128v4df:
24184 case CODE_FOR_avx_vextractf128v8sf:
24185 case CODE_FOR_avx_vextractf128v8si:
24186 case CODE_FOR_avx_vinsertf128v4df:
24187 case CODE_FOR_avx_vinsertf128v8sf:
24188 case CODE_FOR_avx_vinsertf128v8si:
24189 error ("the last argument must be a 1-bit immediate");
24192 case CODE_FOR_avx_cmpsdv2df3:
24193 case CODE_FOR_avx_cmpssv4sf3:
24194 case CODE_FOR_avx_cmppdv2df3:
24195 case CODE_FOR_avx_cmppsv4sf3:
24196 case CODE_FOR_avx_cmppdv4df3:
24197 case CODE_FOR_avx_cmppsv8sf3:
24198 error ("the last argument must be a 5-bit immediate");
24202 switch (nargs_constant)
24205 if ((nargs - i) == nargs_constant)
24207 error ("the next to last argument must be an 8-bit immediate");
24211 error ("the last argument must be an 8-bit immediate");
24214 gcc_unreachable ();
24221 if (VECTOR_MODE_P (mode))
24222 op = safe_vector_operand (op, mode);
24224 /* If we aren't optimizing, only allow one memory operand to
24226 if (memory_operand (op, mode))
24229 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24231 if (optimize || !match || num_memory > 1)
24232 op = copy_to_mode_reg (mode, op);
24236 op = copy_to_reg (op);
24237 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24242 args[i].mode = mode;
24248 pat = GEN_FCN (icode) (real_target, args[0].op);
24251 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24254 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24258 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24259 args[2].op, args[3].op);
24262 gcc_unreachable ();
24272 /* Subroutine of ix86_expand_builtin to take care of special insns
24273 with variable number of operands. */
24276 ix86_expand_special_args_builtin (const struct builtin_description *d,
24277 tree exp, rtx target)
24281 unsigned int i, nargs, arg_adjust, memory;
24285 enum machine_mode mode;
24287 enum insn_code icode = d->icode;
24288 bool last_arg_constant = false;
24289 const struct insn_data *insn_p = &insn_data[icode];
24290 enum machine_mode tmode = insn_p->operand[0].mode;
24291 enum { load, store } klass;
24293 switch ((enum ix86_special_builtin_type) d->flag)
24295 case VOID_FTYPE_VOID:
24296 emit_insn (GEN_FCN (icode) (target));
24298 case V2DI_FTYPE_PV2DI:
24299 case V32QI_FTYPE_PCCHAR:
24300 case V16QI_FTYPE_PCCHAR:
24301 case V8SF_FTYPE_PCV4SF:
24302 case V8SF_FTYPE_PCFLOAT:
24303 case V4SF_FTYPE_PCFLOAT:
24304 case V4DF_FTYPE_PCV2DF:
24305 case V4DF_FTYPE_PCDOUBLE:
24306 case V2DF_FTYPE_PCDOUBLE:
24311 case VOID_FTYPE_PV2SF_V4SF:
24312 case VOID_FTYPE_PV4DI_V4DI:
24313 case VOID_FTYPE_PV2DI_V2DI:
24314 case VOID_FTYPE_PCHAR_V32QI:
24315 case VOID_FTYPE_PCHAR_V16QI:
24316 case VOID_FTYPE_PFLOAT_V8SF:
24317 case VOID_FTYPE_PFLOAT_V4SF:
24318 case VOID_FTYPE_PDOUBLE_V4DF:
24319 case VOID_FTYPE_PDOUBLE_V2DF:
24320 case VOID_FTYPE_PDI_DI:
24321 case VOID_FTYPE_PINT_INT:
24324 /* Reserve memory operand for target. */
24325 memory = ARRAY_SIZE (args);
24327 case V4SF_FTYPE_V4SF_PCV2SF:
24328 case V2DF_FTYPE_V2DF_PCDOUBLE:
24333 case V8SF_FTYPE_PCV8SF_V8SF:
24334 case V4DF_FTYPE_PCV4DF_V4DF:
24335 case V4SF_FTYPE_PCV4SF_V4SF:
24336 case V2DF_FTYPE_PCV2DF_V2DF:
24341 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24342 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24343 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24344 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24347 /* Reserve memory operand for target. */
24348 memory = ARRAY_SIZE (args);
24351 gcc_unreachable ();
24354 gcc_assert (nargs <= ARRAY_SIZE (args));
24356 if (klass == store)
24358 arg = CALL_EXPR_ARG (exp, 0);
24359 op = expand_normal (arg);
24360 gcc_assert (target == 0);
24361 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24369 || GET_MODE (target) != tmode
24370 || ! (*insn_p->operand[0].predicate) (target, tmode))
24371 target = gen_reg_rtx (tmode);
24374 for (i = 0; i < nargs; i++)
24376 enum machine_mode mode = insn_p->operand[i + 1].mode;
24379 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24380 op = expand_normal (arg);
24381 match = (*insn_p->operand[i + 1].predicate) (op, mode);
24383 if (last_arg_constant && (i + 1) == nargs)
24389 error ("the last argument must be an 8-bit immediate");
24397 /* This must be the memory operand. */
24398 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24399 gcc_assert (GET_MODE (op) == mode
24400 || GET_MODE (op) == VOIDmode);
24404 /* This must be register. */
24405 if (VECTOR_MODE_P (mode))
24406 op = safe_vector_operand (op, mode);
24408 gcc_assert (GET_MODE (op) == mode
24409 || GET_MODE (op) == VOIDmode);
24410 op = copy_to_mode_reg (mode, op);
24415 args[i].mode = mode;
24421 pat = GEN_FCN (icode) (target, args[0].op);
24424 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24427 gcc_unreachable ();
24433 return klass == store ? 0 : target;
24436 /* Return the integer constant in ARG. Constrain it to be in the range
24437 of the subparts of VEC_TYPE; issue an error if not. */
24440 get_element_number (tree vec_type, tree arg)
24442 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24444 if (!host_integerp (arg, 1)
24445 || (elt = tree_low_cst (arg, 1), elt > max))
24447 error ("selector must be an integer constant in the range 0..%wi", max);
24454 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24455 ix86_expand_vector_init. We DO have language-level syntax for this, in
24456 the form of (type){ init-list }. Except that since we can't place emms
24457 instructions from inside the compiler, we can't allow the use of MMX
24458 registers unless the user explicitly asks for it. So we do *not* define
24459 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
24460 we have builtins invoked by mmintrin.h that gives us license to emit
24461 these sorts of instructions. */
24464 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24466 enum machine_mode tmode = TYPE_MODE (type);
24467 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24468 int i, n_elt = GET_MODE_NUNITS (tmode);
24469 rtvec v = rtvec_alloc (n_elt);
24471 gcc_assert (VECTOR_MODE_P (tmode));
24472 gcc_assert (call_expr_nargs (exp) == n_elt);
24474 for (i = 0; i < n_elt; ++i)
24476 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24477 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24480 if (!target || !register_operand (target, tmode))
24481 target = gen_reg_rtx (tmode);
24483 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24487 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24488 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
24489 had a language-level syntax for referencing vector elements. */
24492 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24494 enum machine_mode tmode, mode0;
24499 arg0 = CALL_EXPR_ARG (exp, 0);
24500 arg1 = CALL_EXPR_ARG (exp, 1);
24502 op0 = expand_normal (arg0);
24503 elt = get_element_number (TREE_TYPE (arg0), arg1);
24505 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24506 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24507 gcc_assert (VECTOR_MODE_P (mode0));
24509 op0 = force_reg (mode0, op0);
24511 if (optimize || !target || !register_operand (target, tmode))
24512 target = gen_reg_rtx (tmode);
24514 ix86_expand_vector_extract (true, target, op0, elt);
24519 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24520 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24521 a language-level syntax for referencing vector elements. */
24524 ix86_expand_vec_set_builtin (tree exp)
24526 enum machine_mode tmode, mode1;
24527 tree arg0, arg1, arg2;
24529 rtx op0, op1, target;
24531 arg0 = CALL_EXPR_ARG (exp, 0);
24532 arg1 = CALL_EXPR_ARG (exp, 1);
24533 arg2 = CALL_EXPR_ARG (exp, 2);
24535 tmode = TYPE_MODE (TREE_TYPE (arg0));
24536 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24537 gcc_assert (VECTOR_MODE_P (tmode));
24539 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24540 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24541 elt = get_element_number (TREE_TYPE (arg0), arg2);
24543 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24544 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24546 op0 = force_reg (tmode, op0);
24547 op1 = force_reg (mode1, op1);
24549 /* OP0 is the source of these builtin functions and shouldn't be
24550 modified. Create a copy, use it and return it as target. */
24551 target = gen_reg_rtx (tmode);
24552 emit_move_insn (target, op0);
24553 ix86_expand_vector_set (true, target, op1, elt);
24558 /* Expand an expression EXP that calls a built-in function,
24559 with result going to TARGET if that's convenient
24560 (and in mode MODE if that's convenient).
24561 SUBTARGET may be used as the target for computing one of EXP's operands.
24562 IGNORE is nonzero if the value is to be ignored. */
24565 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
24566 enum machine_mode mode ATTRIBUTE_UNUSED,
24567 int ignore ATTRIBUTE_UNUSED)
24569 const struct builtin_description *d;
24571 enum insn_code icode;
24572 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24573 tree arg0, arg1, arg2;
24574 rtx op0, op1, op2, pat;
24575 enum machine_mode mode0, mode1, mode2;
24576 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24578 /* Determine whether the builtin function is available under the current ISA.
24579 Originally the builtin was not created if it wasn't applicable to the
24580 current ISA based on the command line switches. With function specific
24581 options, we need to check in the context of the function making the call
24582 whether it is supported. */
24583 if (ix86_builtins_isa[fcode].isa
24584 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
24586 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
24587 NULL, NULL, false);
24590 error ("%qE needs unknown isa option", fndecl);
24593 gcc_assert (opts != NULL);
24594 error ("%qE needs isa option %s", fndecl, opts);
24602 case IX86_BUILTIN_MASKMOVQ:
24603 case IX86_BUILTIN_MASKMOVDQU:
24604 icode = (fcode == IX86_BUILTIN_MASKMOVQ
24605 ? CODE_FOR_mmx_maskmovq
24606 : CODE_FOR_sse2_maskmovdqu);
24607 /* Note the arg order is different from the operand order. */
24608 arg1 = CALL_EXPR_ARG (exp, 0);
24609 arg2 = CALL_EXPR_ARG (exp, 1);
24610 arg0 = CALL_EXPR_ARG (exp, 2);
24611 op0 = expand_normal (arg0);
24612 op1 = expand_normal (arg1);
24613 op2 = expand_normal (arg2);
24614 mode0 = insn_data[icode].operand[0].mode;
24615 mode1 = insn_data[icode].operand[1].mode;
24616 mode2 = insn_data[icode].operand[2].mode;
24618 op0 = force_reg (Pmode, op0);
24619 op0 = gen_rtx_MEM (mode1, op0);
24621 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
24622 op0 = copy_to_mode_reg (mode0, op0);
24623 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
24624 op1 = copy_to_mode_reg (mode1, op1);
24625 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
24626 op2 = copy_to_mode_reg (mode2, op2);
24627 pat = GEN_FCN (icode) (op0, op1, op2);
24633 case IX86_BUILTIN_LDMXCSR:
24634 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
24635 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24636 emit_move_insn (target, op0);
24637 emit_insn (gen_sse_ldmxcsr (target));
24640 case IX86_BUILTIN_STMXCSR:
24641 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24642 emit_insn (gen_sse_stmxcsr (target));
24643 return copy_to_mode_reg (SImode, target);
24645 case IX86_BUILTIN_CLFLUSH:
24646 arg0 = CALL_EXPR_ARG (exp, 0);
24647 op0 = expand_normal (arg0);
24648 icode = CODE_FOR_sse2_clflush;
24649 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
24650 op0 = copy_to_mode_reg (Pmode, op0);
24652 emit_insn (gen_sse2_clflush (op0));
24655 case IX86_BUILTIN_MONITOR:
24656 arg0 = CALL_EXPR_ARG (exp, 0);
24657 arg1 = CALL_EXPR_ARG (exp, 1);
24658 arg2 = CALL_EXPR_ARG (exp, 2);
24659 op0 = expand_normal (arg0);
24660 op1 = expand_normal (arg1);
24661 op2 = expand_normal (arg2);
24663 op0 = copy_to_mode_reg (Pmode, op0);
24665 op1 = copy_to_mode_reg (SImode, op1);
24667 op2 = copy_to_mode_reg (SImode, op2);
24668 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
24671 case IX86_BUILTIN_MWAIT:
24672 arg0 = CALL_EXPR_ARG (exp, 0);
24673 arg1 = CALL_EXPR_ARG (exp, 1);
24674 op0 = expand_normal (arg0);
24675 op1 = expand_normal (arg1);
24677 op0 = copy_to_mode_reg (SImode, op0);
24679 op1 = copy_to_mode_reg (SImode, op1);
24680 emit_insn (gen_sse3_mwait (op0, op1));
24683 case IX86_BUILTIN_VEC_INIT_V2SI:
24684 case IX86_BUILTIN_VEC_INIT_V4HI:
24685 case IX86_BUILTIN_VEC_INIT_V8QI:
24686 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
24688 case IX86_BUILTIN_VEC_EXT_V2DF:
24689 case IX86_BUILTIN_VEC_EXT_V2DI:
24690 case IX86_BUILTIN_VEC_EXT_V4SF:
24691 case IX86_BUILTIN_VEC_EXT_V4SI:
24692 case IX86_BUILTIN_VEC_EXT_V8HI:
24693 case IX86_BUILTIN_VEC_EXT_V2SI:
24694 case IX86_BUILTIN_VEC_EXT_V4HI:
24695 case IX86_BUILTIN_VEC_EXT_V16QI:
24696 return ix86_expand_vec_ext_builtin (exp, target);
24698 case IX86_BUILTIN_VEC_SET_V2DI:
24699 case IX86_BUILTIN_VEC_SET_V4SF:
24700 case IX86_BUILTIN_VEC_SET_V4SI:
24701 case IX86_BUILTIN_VEC_SET_V8HI:
24702 case IX86_BUILTIN_VEC_SET_V4HI:
24703 case IX86_BUILTIN_VEC_SET_V16QI:
24704 return ix86_expand_vec_set_builtin (exp);
24706 case IX86_BUILTIN_INFQ:
24708 REAL_VALUE_TYPE inf;
24712 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
24714 tmp = validize_mem (force_const_mem (mode, tmp));
24717 target = gen_reg_rtx (mode);
24719 emit_move_insn (target, tmp);
24727 for (i = 0, d = bdesc_special_args;
24728 i < ARRAY_SIZE (bdesc_special_args);
24730 if (d->code == fcode)
24731 return ix86_expand_special_args_builtin (d, exp, target);
24733 for (i = 0, d = bdesc_args;
24734 i < ARRAY_SIZE (bdesc_args);
24736 if (d->code == fcode)
24739 case IX86_BUILTIN_FABSQ:
24740 case IX86_BUILTIN_COPYSIGNQ:
24742 /* Emit a normal call if SSE2 isn't available. */
24743 return expand_call (exp, target, ignore);
24745 return ix86_expand_args_builtin (d, exp, target);
24748 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24749 if (d->code == fcode)
24750 return ix86_expand_sse_comi (d, exp, target);
24752 for (i = 0, d = bdesc_pcmpestr;
24753 i < ARRAY_SIZE (bdesc_pcmpestr);
24755 if (d->code == fcode)
24756 return ix86_expand_sse_pcmpestr (d, exp, target);
24758 for (i = 0, d = bdesc_pcmpistr;
24759 i < ARRAY_SIZE (bdesc_pcmpistr);
24761 if (d->code == fcode)
24762 return ix86_expand_sse_pcmpistr (d, exp, target);
24764 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24765 if (d->code == fcode)
24766 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
24767 (enum multi_arg_type)d->flag,
24770 gcc_unreachable ();
24773 /* Returns a function decl for a vectorized version of the builtin function
24774 with builtin function code FN and the result vector type TYPE, or NULL_TREE
24775 if it is not available. */
24778 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
24781 enum machine_mode in_mode, out_mode;
24784 if (TREE_CODE (type_out) != VECTOR_TYPE
24785 || TREE_CODE (type_in) != VECTOR_TYPE)
24788 out_mode = TYPE_MODE (TREE_TYPE (type_out));
24789 out_n = TYPE_VECTOR_SUBPARTS (type_out);
24790 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24791 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24795 case BUILT_IN_SQRT:
24796 if (out_mode == DFmode && out_n == 2
24797 && in_mode == DFmode && in_n == 2)
24798 return ix86_builtins[IX86_BUILTIN_SQRTPD];
24801 case BUILT_IN_SQRTF:
24802 if (out_mode == SFmode && out_n == 4
24803 && in_mode == SFmode && in_n == 4)
24804 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
24807 case BUILT_IN_LRINT:
24808 if (out_mode == SImode && out_n == 4
24809 && in_mode == DFmode && in_n == 2)
24810 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
24813 case BUILT_IN_LRINTF:
24814 if (out_mode == SImode && out_n == 4
24815 && in_mode == SFmode && in_n == 4)
24816 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
24823 /* Dispatch to a handler for a vectorization library. */
24824 if (ix86_veclib_handler)
24825 return (*ix86_veclib_handler)(fn, type_out, type_in);
24830 /* Handler for an SVML-style interface to
24831 a library with vectorized intrinsics. */
24834 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
24837 tree fntype, new_fndecl, args;
24840 enum machine_mode el_mode, in_mode;
24843 /* The SVML is suitable for unsafe math only. */
24844 if (!flag_unsafe_math_optimizations)
24847 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24848 n = TYPE_VECTOR_SUBPARTS (type_out);
24849 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24850 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24851 if (el_mode != in_mode
24859 case BUILT_IN_LOG10:
24861 case BUILT_IN_TANH:
24863 case BUILT_IN_ATAN:
24864 case BUILT_IN_ATAN2:
24865 case BUILT_IN_ATANH:
24866 case BUILT_IN_CBRT:
24867 case BUILT_IN_SINH:
24869 case BUILT_IN_ASINH:
24870 case BUILT_IN_ASIN:
24871 case BUILT_IN_COSH:
24873 case BUILT_IN_ACOSH:
24874 case BUILT_IN_ACOS:
24875 if (el_mode != DFmode || n != 2)
24879 case BUILT_IN_EXPF:
24880 case BUILT_IN_LOGF:
24881 case BUILT_IN_LOG10F:
24882 case BUILT_IN_POWF:
24883 case BUILT_IN_TANHF:
24884 case BUILT_IN_TANF:
24885 case BUILT_IN_ATANF:
24886 case BUILT_IN_ATAN2F:
24887 case BUILT_IN_ATANHF:
24888 case BUILT_IN_CBRTF:
24889 case BUILT_IN_SINHF:
24890 case BUILT_IN_SINF:
24891 case BUILT_IN_ASINHF:
24892 case BUILT_IN_ASINF:
24893 case BUILT_IN_COSHF:
24894 case BUILT_IN_COSF:
24895 case BUILT_IN_ACOSHF:
24896 case BUILT_IN_ACOSF:
24897 if (el_mode != SFmode || n != 4)
24905 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24907 if (fn == BUILT_IN_LOGF)
24908 strcpy (name, "vmlsLn4");
24909 else if (fn == BUILT_IN_LOG)
24910 strcpy (name, "vmldLn2");
24913 sprintf (name, "vmls%s", bname+10);
24914 name[strlen (name)-1] = '4';
24917 sprintf (name, "vmld%s2", bname+10);
24919 /* Convert to uppercase. */
24923 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24924 args = TREE_CHAIN (args))
24928 fntype = build_function_type_list (type_out, type_in, NULL);
24930 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24932 /* Build a function declaration for the vectorized function. */
24933 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
24934 TREE_PUBLIC (new_fndecl) = 1;
24935 DECL_EXTERNAL (new_fndecl) = 1;
24936 DECL_IS_NOVOPS (new_fndecl) = 1;
24937 TREE_READONLY (new_fndecl) = 1;
24942 /* Handler for an ACML-style interface to
24943 a library with vectorized intrinsics. */
24946 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
24948 char name[20] = "__vr.._";
24949 tree fntype, new_fndecl, args;
24952 enum machine_mode el_mode, in_mode;
24955 /* The ACML is 64bits only and suitable for unsafe math only as
24956 it does not correctly support parts of IEEE with the required
24957 precision such as denormals. */
24959 || !flag_unsafe_math_optimizations)
24962 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24963 n = TYPE_VECTOR_SUBPARTS (type_out);
24964 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24965 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24966 if (el_mode != in_mode
24976 case BUILT_IN_LOG2:
24977 case BUILT_IN_LOG10:
24980 if (el_mode != DFmode
24985 case BUILT_IN_SINF:
24986 case BUILT_IN_COSF:
24987 case BUILT_IN_EXPF:
24988 case BUILT_IN_POWF:
24989 case BUILT_IN_LOGF:
24990 case BUILT_IN_LOG2F:
24991 case BUILT_IN_LOG10F:
24994 if (el_mode != SFmode
25003 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25004 sprintf (name + 7, "%s", bname+10);
25007 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25008 args = TREE_CHAIN (args))
25012 fntype = build_function_type_list (type_out, type_in, NULL);
25014 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25016 /* Build a function declaration for the vectorized function. */
25017 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25018 TREE_PUBLIC (new_fndecl) = 1;
25019 DECL_EXTERNAL (new_fndecl) = 1;
25020 DECL_IS_NOVOPS (new_fndecl) = 1;
25021 TREE_READONLY (new_fndecl) = 1;
25027 /* Returns a decl of a function that implements conversion of an integer vector
25028 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25029 side of the conversion.
25030 Return NULL_TREE if it is not available. */
25033 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25035 if (TREE_CODE (type) != VECTOR_TYPE)
25041 switch (TYPE_MODE (type))
25044 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25049 case FIX_TRUNC_EXPR:
25050 switch (TYPE_MODE (type))
25053 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25063 /* Returns a code for a target-specific builtin that implements
25064 reciprocal of the function, or NULL_TREE if not available. */
25067 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25068 bool sqrt ATTRIBUTE_UNUSED)
25070 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25071 && flag_finite_math_only && !flag_trapping_math
25072 && flag_unsafe_math_optimizations))
25076 /* Machine dependent builtins. */
25079 /* Vectorized version of sqrt to rsqrt conversion. */
25080 case IX86_BUILTIN_SQRTPS_NR:
25081 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25087 /* Normal builtins. */
25090 /* Sqrt to rsqrt conversion. */
25091 case BUILT_IN_SQRTF:
25092 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25099 /* Store OPERAND to the memory after reload is completed. This means
25100 that we can't easily use assign_stack_local. */
25102 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25106 gcc_assert (reload_completed);
25107 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25109 result = gen_rtx_MEM (mode,
25110 gen_rtx_PLUS (Pmode,
25112 GEN_INT (-RED_ZONE_SIZE)));
25113 emit_move_insn (result, operand);
25115 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25121 operand = gen_lowpart (DImode, operand);
25125 gen_rtx_SET (VOIDmode,
25126 gen_rtx_MEM (DImode,
25127 gen_rtx_PRE_DEC (DImode,
25128 stack_pointer_rtx)),
25132 gcc_unreachable ();
25134 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25143 split_di (&operand, 1, operands, operands + 1);
25145 gen_rtx_SET (VOIDmode,
25146 gen_rtx_MEM (SImode,
25147 gen_rtx_PRE_DEC (Pmode,
25148 stack_pointer_rtx)),
25151 gen_rtx_SET (VOIDmode,
25152 gen_rtx_MEM (SImode,
25153 gen_rtx_PRE_DEC (Pmode,
25154 stack_pointer_rtx)),
25159 /* Store HImodes as SImodes. */
25160 operand = gen_lowpart (SImode, operand);
25164 gen_rtx_SET (VOIDmode,
25165 gen_rtx_MEM (GET_MODE (operand),
25166 gen_rtx_PRE_DEC (SImode,
25167 stack_pointer_rtx)),
25171 gcc_unreachable ();
25173 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25178 /* Free operand from the memory. */
25180 ix86_free_from_memory (enum machine_mode mode)
25182 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25186 if (mode == DImode || TARGET_64BIT)
25190 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25191 to pop or add instruction if registers are available. */
25192 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25193 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25198 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25199 QImode must go into class Q_REGS.
25200 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25201 movdf to do mem-to-mem moves through integer regs. */
25203 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25205 enum machine_mode mode = GET_MODE (x);
25207 /* We're only allowed to return a subclass of CLASS. Many of the
25208 following checks fail for NO_REGS, so eliminate that early. */
25209 if (regclass == NO_REGS)
25212 /* All classes can load zeros. */
25213 if (x == CONST0_RTX (mode))
25216 /* Force constants into memory if we are loading a (nonzero) constant into
25217 an MMX or SSE register. This is because there are no MMX/SSE instructions
25218 to load from a constant. */
25220 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25223 /* Prefer SSE regs only, if we can use them for math. */
25224 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25225 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25227 /* Floating-point constants need more complex checks. */
25228 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25230 /* General regs can load everything. */
25231 if (reg_class_subset_p (regclass, GENERAL_REGS))
25234 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25235 zero above. We only want to wind up preferring 80387 registers if
25236 we plan on doing computation with them. */
25238 && standard_80387_constant_p (x))
25240 /* Limit class to non-sse. */
25241 if (regclass == FLOAT_SSE_REGS)
25243 if (regclass == FP_TOP_SSE_REGS)
25245 if (regclass == FP_SECOND_SSE_REGS)
25246 return FP_SECOND_REG;
25247 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25254 /* Generally when we see PLUS here, it's the function invariant
25255 (plus soft-fp const_int). Which can only be computed into general
25257 if (GET_CODE (x) == PLUS)
25258 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25260 /* QImode constants are easy to load, but non-constant QImode data
25261 must go into Q_REGS. */
25262 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25264 if (reg_class_subset_p (regclass, Q_REGS))
25266 if (reg_class_subset_p (Q_REGS, regclass))
25274 /* Discourage putting floating-point values in SSE registers unless
25275 SSE math is being used, and likewise for the 387 registers. */
25277 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25279 enum machine_mode mode = GET_MODE (x);
25281 /* Restrict the output reload class to the register bank that we are doing
25282 math on. If we would like not to return a subset of CLASS, reject this
25283 alternative: if reload cannot do this, it will still use its choice. */
25284 mode = GET_MODE (x);
25285 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25286 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25288 if (X87_FLOAT_MODE_P (mode))
25290 if (regclass == FP_TOP_SSE_REGS)
25292 else if (regclass == FP_SECOND_SSE_REGS)
25293 return FP_SECOND_REG;
25295 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25301 static enum reg_class
25302 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25303 enum machine_mode mode,
25304 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25306 /* QImode spills from non-QI registers require
25307 intermediate register on 32bit targets. */
25308 if (!in_p && mode == QImode && !TARGET_64BIT
25309 && (rclass == GENERAL_REGS
25310 || rclass == LEGACY_REGS
25311 || rclass == INDEX_REGS))
25320 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25321 regno = true_regnum (x);
25323 /* Return Q_REGS if the operand is in memory. */
25331 /* If we are copying between general and FP registers, we need a memory
25332 location. The same is true for SSE and MMX registers.
25334 To optimize register_move_cost performance, allow inline variant.
25336 The macro can't work reliably when one of the CLASSES is class containing
25337 registers from multiple units (SSE, MMX, integer). We avoid this by never
25338 combining those units in single alternative in the machine description.
25339 Ensure that this constraint holds to avoid unexpected surprises.
25341 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25342 enforce these sanity checks. */
25345 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25346 enum machine_mode mode, int strict)
25348 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25349 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25350 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25351 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25352 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25353 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25355 gcc_assert (!strict);
25359 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25362 /* ??? This is a lie. We do have moves between mmx/general, and for
25363 mmx/sse2. But by saying we need secondary memory we discourage the
25364 register allocator from using the mmx registers unless needed. */
25365 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25368 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25370 /* SSE1 doesn't have any direct moves from other classes. */
25374 /* If the target says that inter-unit moves are more expensive
25375 than moving through memory, then don't generate them. */
25376 if (!TARGET_INTER_UNIT_MOVES)
25379 /* Between SSE and general, we have moves no larger than word size. */
25380 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25388 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25389 enum machine_mode mode, int strict)
25391 return inline_secondary_memory_needed (class1, class2, mode, strict);
25394 /* Return true if the registers in CLASS cannot represent the change from
25395 modes FROM to TO. */
25398 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
25399 enum reg_class regclass)
25404 /* x87 registers can't do subreg at all, as all values are reformatted
25405 to extended precision. */
25406 if (MAYBE_FLOAT_CLASS_P (regclass))
25409 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25411 /* Vector registers do not support QI or HImode loads. If we don't
25412 disallow a change to these modes, reload will assume it's ok to
25413 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
25414 the vec_dupv4hi pattern. */
25415 if (GET_MODE_SIZE (from) < 4)
25418 /* Vector registers do not support subreg with nonzero offsets, which
25419 are otherwise valid for integer registers. Since we can't see
25420 whether we have a nonzero offset from here, prohibit all
25421 nonparadoxical subregs changing size. */
25422 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
25429 /* Return the cost of moving data of mode M between a
25430 register and memory. A value of 2 is the default; this cost is
25431 relative to those in `REGISTER_MOVE_COST'.
25433 This function is used extensively by register_move_cost that is used to
25434 build tables at startup. Make it inline in this case.
25435 When IN is 2, return maximum of in and out move cost.
25437 If moving between registers and memory is more expensive than
25438 between two registers, you should define this macro to express the
25441 Model also increased moving costs of QImode registers in non
25445 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
25449 if (FLOAT_CLASS_P (regclass))
25467 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
25468 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
25470 if (SSE_CLASS_P (regclass))
25473 switch (GET_MODE_SIZE (mode))
25488 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
25489 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
25491 if (MMX_CLASS_P (regclass))
25494 switch (GET_MODE_SIZE (mode))
25506 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
25507 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
25509 switch (GET_MODE_SIZE (mode))
25512 if (Q_CLASS_P (regclass) || TARGET_64BIT)
25515 return ix86_cost->int_store[0];
25516 if (TARGET_PARTIAL_REG_DEPENDENCY
25517 && optimize_function_for_speed_p (cfun))
25518 cost = ix86_cost->movzbl_load;
25520 cost = ix86_cost->int_load[0];
25522 return MAX (cost, ix86_cost->int_store[0]);
25528 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
25530 return ix86_cost->movzbl_load;
25532 return ix86_cost->int_store[0] + 4;
25537 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
25538 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
25540 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
25541 if (mode == TFmode)
25544 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
25546 cost = ix86_cost->int_load[2];
25548 cost = ix86_cost->int_store[2];
25549 return (cost * (((int) GET_MODE_SIZE (mode)
25550 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
25555 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
25557 return inline_memory_move_cost (mode, regclass, in);
25561 /* Return the cost of moving data from a register in class CLASS1 to
25562 one in class CLASS2.
25564 It is not required that the cost always equal 2 when FROM is the same as TO;
25565 on some machines it is expensive to move between registers if they are not
25566 general registers. */
25569 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
25570 enum reg_class class2)
25572 /* In case we require secondary memory, compute cost of the store followed
25573 by load. In order to avoid bad register allocation choices, we need
25574 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
25576 if (inline_secondary_memory_needed (class1, class2, mode, 0))
25580 cost += inline_memory_move_cost (mode, class1, 2);
25581 cost += inline_memory_move_cost (mode, class2, 2);
25583 /* In case of copying from general_purpose_register we may emit multiple
25584 stores followed by single load causing memory size mismatch stall.
25585 Count this as arbitrarily high cost of 20. */
25586 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
25589 /* In the case of FP/MMX moves, the registers actually overlap, and we
25590 have to switch modes in order to treat them differently. */
25591 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
25592 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25598 /* Moves between SSE/MMX and integer unit are expensive. */
25599 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25600 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25602 /* ??? By keeping returned value relatively high, we limit the number
25603 of moves between integer and MMX/SSE registers for all targets.
25604 Additionally, high value prevents problem with x86_modes_tieable_p(),
25605 where integer modes in MMX/SSE registers are not tieable
25606 because of missing QImode and HImode moves to, from or between
25607 MMX/SSE registers. */
25608 return MAX (8, ix86_cost->mmxsse_to_integer);
25610 if (MAYBE_FLOAT_CLASS_P (class1))
25611 return ix86_cost->fp_move;
25612 if (MAYBE_SSE_CLASS_P (class1))
25613 return ix86_cost->sse_move;
25614 if (MAYBE_MMX_CLASS_P (class1))
25615 return ix86_cost->mmx_move;
25619 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
25622 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
25624 /* Flags and only flags can only hold CCmode values. */
25625 if (CC_REGNO_P (regno))
25626 return GET_MODE_CLASS (mode) == MODE_CC;
25627 if (GET_MODE_CLASS (mode) == MODE_CC
25628 || GET_MODE_CLASS (mode) == MODE_RANDOM
25629 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
25631 if (FP_REGNO_P (regno))
25632 return VALID_FP_MODE_P (mode);
25633 if (SSE_REGNO_P (regno))
25635 /* We implement the move patterns for all vector modes into and
25636 out of SSE registers, even when no operation instructions
25637 are available. OImode move is available only when AVX is
25639 return ((TARGET_AVX && mode == OImode)
25640 || VALID_AVX256_REG_MODE (mode)
25641 || VALID_SSE_REG_MODE (mode)
25642 || VALID_SSE2_REG_MODE (mode)
25643 || VALID_MMX_REG_MODE (mode)
25644 || VALID_MMX_REG_MODE_3DNOW (mode));
25646 if (MMX_REGNO_P (regno))
25648 /* We implement the move patterns for 3DNOW modes even in MMX mode,
25649 so if the register is available at all, then we can move data of
25650 the given mode into or out of it. */
25651 return (VALID_MMX_REG_MODE (mode)
25652 || VALID_MMX_REG_MODE_3DNOW (mode));
25655 if (mode == QImode)
25657 /* Take care for QImode values - they can be in non-QI regs,
25658 but then they do cause partial register stalls. */
25659 if (regno < 4 || TARGET_64BIT)
25661 if (!TARGET_PARTIAL_REG_STALL)
25663 return reload_in_progress || reload_completed;
25665 /* We handle both integer and floats in the general purpose registers. */
25666 else if (VALID_INT_MODE_P (mode))
25668 else if (VALID_FP_MODE_P (mode))
25670 else if (VALID_DFP_MODE_P (mode))
25672 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
25673 on to use that value in smaller contexts, this can easily force a
25674 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
25675 supporting DImode, allow it. */
25676 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
25682 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
25683 tieable integer mode. */
25686 ix86_tieable_integer_mode_p (enum machine_mode mode)
25695 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
25698 return TARGET_64BIT;
25705 /* Return true if MODE1 is accessible in a register that can hold MODE2
25706 without copying. That is, all register classes that can hold MODE2
25707 can also hold MODE1. */
25710 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
25712 if (mode1 == mode2)
25715 if (ix86_tieable_integer_mode_p (mode1)
25716 && ix86_tieable_integer_mode_p (mode2))
25719 /* MODE2 being XFmode implies fp stack or general regs, which means we
25720 can tie any smaller floating point modes to it. Note that we do not
25721 tie this with TFmode. */
25722 if (mode2 == XFmode)
25723 return mode1 == SFmode || mode1 == DFmode;
25725 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
25726 that we can tie it with SFmode. */
25727 if (mode2 == DFmode)
25728 return mode1 == SFmode;
25730 /* If MODE2 is only appropriate for an SSE register, then tie with
25731 any other mode acceptable to SSE registers. */
25732 if (GET_MODE_SIZE (mode2) == 16
25733 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
25734 return (GET_MODE_SIZE (mode1) == 16
25735 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
25737 /* If MODE2 is appropriate for an MMX register, then tie
25738 with any other mode acceptable to MMX registers. */
25739 if (GET_MODE_SIZE (mode2) == 8
25740 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
25741 return (GET_MODE_SIZE (mode1) == 8
25742 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
25747 /* Compute a (partial) cost for rtx X. Return true if the complete
25748 cost has been computed, and false if subexpressions should be
25749 scanned. In either case, *TOTAL contains the cost result. */
25752 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
25754 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
25755 enum machine_mode mode = GET_MODE (x);
25756 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
25764 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
25766 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
25768 else if (flag_pic && SYMBOLIC_CONST (x)
25770 || (!GET_CODE (x) != LABEL_REF
25771 && (GET_CODE (x) != SYMBOL_REF
25772 || !SYMBOL_REF_LOCAL_P (x)))))
25779 if (mode == VOIDmode)
25782 switch (standard_80387_constant_p (x))
25787 default: /* Other constants */
25792 /* Start with (MEM (SYMBOL_REF)), since that's where
25793 it'll probably end up. Add a penalty for size. */
25794 *total = (COSTS_N_INSNS (1)
25795 + (flag_pic != 0 && !TARGET_64BIT)
25796 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
25802 /* The zero extensions is often completely free on x86_64, so make
25803 it as cheap as possible. */
25804 if (TARGET_64BIT && mode == DImode
25805 && GET_MODE (XEXP (x, 0)) == SImode)
25807 else if (TARGET_ZERO_EXTEND_WITH_AND)
25808 *total = cost->add;
25810 *total = cost->movzx;
25814 *total = cost->movsx;
25818 if (CONST_INT_P (XEXP (x, 1))
25819 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
25821 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25824 *total = cost->add;
25827 if ((value == 2 || value == 3)
25828 && cost->lea <= cost->shift_const)
25830 *total = cost->lea;
25840 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
25842 if (CONST_INT_P (XEXP (x, 1)))
25844 if (INTVAL (XEXP (x, 1)) > 32)
25845 *total = cost->shift_const + COSTS_N_INSNS (2);
25847 *total = cost->shift_const * 2;
25851 if (GET_CODE (XEXP (x, 1)) == AND)
25852 *total = cost->shift_var * 2;
25854 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
25859 if (CONST_INT_P (XEXP (x, 1)))
25860 *total = cost->shift_const;
25862 *total = cost->shift_var;
25867 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25869 /* ??? SSE scalar cost should be used here. */
25870 *total = cost->fmul;
25873 else if (X87_FLOAT_MODE_P (mode))
25875 *total = cost->fmul;
25878 else if (FLOAT_MODE_P (mode))
25880 /* ??? SSE vector cost should be used here. */
25881 *total = cost->fmul;
25886 rtx op0 = XEXP (x, 0);
25887 rtx op1 = XEXP (x, 1);
25889 if (CONST_INT_P (XEXP (x, 1)))
25891 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25892 for (nbits = 0; value != 0; value &= value - 1)
25896 /* This is arbitrary. */
25899 /* Compute costs correctly for widening multiplication. */
25900 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
25901 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
25902 == GET_MODE_SIZE (mode))
25904 int is_mulwiden = 0;
25905 enum machine_mode inner_mode = GET_MODE (op0);
25907 if (GET_CODE (op0) == GET_CODE (op1))
25908 is_mulwiden = 1, op1 = XEXP (op1, 0);
25909 else if (CONST_INT_P (op1))
25911 if (GET_CODE (op0) == SIGN_EXTEND)
25912 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
25915 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
25919 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
25922 *total = (cost->mult_init[MODE_INDEX (mode)]
25923 + nbits * cost->mult_bit
25924 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
25933 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25934 /* ??? SSE cost should be used here. */
25935 *total = cost->fdiv;
25936 else if (X87_FLOAT_MODE_P (mode))
25937 *total = cost->fdiv;
25938 else if (FLOAT_MODE_P (mode))
25939 /* ??? SSE vector cost should be used here. */
25940 *total = cost->fdiv;
25942 *total = cost->divide[MODE_INDEX (mode)];
25946 if (GET_MODE_CLASS (mode) == MODE_INT
25947 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
25949 if (GET_CODE (XEXP (x, 0)) == PLUS
25950 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
25951 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
25952 && CONSTANT_P (XEXP (x, 1)))
25954 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
25955 if (val == 2 || val == 4 || val == 8)
25957 *total = cost->lea;
25958 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
25959 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
25960 outer_code, speed);
25961 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25965 else if (GET_CODE (XEXP (x, 0)) == MULT
25966 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
25968 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
25969 if (val == 2 || val == 4 || val == 8)
25971 *total = cost->lea;
25972 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
25973 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25977 else if (GET_CODE (XEXP (x, 0)) == PLUS)
25979 *total = cost->lea;
25980 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
25981 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
25982 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25989 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25991 /* ??? SSE cost should be used here. */
25992 *total = cost->fadd;
25995 else if (X87_FLOAT_MODE_P (mode))
25997 *total = cost->fadd;
26000 else if (FLOAT_MODE_P (mode))
26002 /* ??? SSE vector cost should be used here. */
26003 *total = cost->fadd;
26011 if (!TARGET_64BIT && mode == DImode)
26013 *total = (cost->add * 2
26014 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26015 << (GET_MODE (XEXP (x, 0)) != DImode))
26016 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26017 << (GET_MODE (XEXP (x, 1)) != DImode)));
26023 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26025 /* ??? SSE cost should be used here. */
26026 *total = cost->fchs;
26029 else if (X87_FLOAT_MODE_P (mode))
26031 *total = cost->fchs;
26034 else if (FLOAT_MODE_P (mode))
26036 /* ??? SSE vector cost should be used here. */
26037 *total = cost->fchs;
26043 if (!TARGET_64BIT && mode == DImode)
26044 *total = cost->add * 2;
26046 *total = cost->add;
26050 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26051 && XEXP (XEXP (x, 0), 1) == const1_rtx
26052 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26053 && XEXP (x, 1) == const0_rtx)
26055 /* This kind of construct is implemented using test[bwl].
26056 Treat it as if we had an AND. */
26057 *total = (cost->add
26058 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26059 + rtx_cost (const1_rtx, outer_code, speed));
26065 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26070 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26071 /* ??? SSE cost should be used here. */
26072 *total = cost->fabs;
26073 else if (X87_FLOAT_MODE_P (mode))
26074 *total = cost->fabs;
26075 else if (FLOAT_MODE_P (mode))
26076 /* ??? SSE vector cost should be used here. */
26077 *total = cost->fabs;
26081 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26082 /* ??? SSE cost should be used here. */
26083 *total = cost->fsqrt;
26084 else if (X87_FLOAT_MODE_P (mode))
26085 *total = cost->fsqrt;
26086 else if (FLOAT_MODE_P (mode))
26087 /* ??? SSE vector cost should be used here. */
26088 *total = cost->fsqrt;
26092 if (XINT (x, 1) == UNSPEC_TP)
26103 static int current_machopic_label_num;
26105 /* Given a symbol name and its associated stub, write out the
26106 definition of the stub. */
26109 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26111 unsigned int length;
26112 char *binder_name, *symbol_name, lazy_ptr_name[32];
26113 int label = ++current_machopic_label_num;
26115 /* For 64-bit we shouldn't get here. */
26116 gcc_assert (!TARGET_64BIT);
26118 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26119 symb = (*targetm.strip_name_encoding) (symb);
26121 length = strlen (stub);
26122 binder_name = XALLOCAVEC (char, length + 32);
26123 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26125 length = strlen (symb);
26126 symbol_name = XALLOCAVEC (char, length + 32);
26127 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26129 sprintf (lazy_ptr_name, "L%d$lz", label);
26132 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26134 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26136 fprintf (file, "%s:\n", stub);
26137 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26141 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26142 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26143 fprintf (file, "\tjmp\t*%%edx\n");
26146 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26148 fprintf (file, "%s:\n", binder_name);
26152 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26153 fprintf (file, "\tpushl\t%%eax\n");
26156 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26158 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26160 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26161 fprintf (file, "%s:\n", lazy_ptr_name);
26162 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26163 fprintf (file, "\t.long %s\n", binder_name);
26167 darwin_x86_file_end (void)
26169 darwin_file_end ();
26172 #endif /* TARGET_MACHO */
26174 /* Order the registers for register allocator. */
26177 x86_order_regs_for_local_alloc (void)
26182 /* First allocate the local general purpose registers. */
26183 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26184 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26185 reg_alloc_order [pos++] = i;
26187 /* Global general purpose registers. */
26188 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26189 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26190 reg_alloc_order [pos++] = i;
26192 /* x87 registers come first in case we are doing FP math
26194 if (!TARGET_SSE_MATH)
26195 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26196 reg_alloc_order [pos++] = i;
26198 /* SSE registers. */
26199 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26200 reg_alloc_order [pos++] = i;
26201 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26202 reg_alloc_order [pos++] = i;
26204 /* x87 registers. */
26205 if (TARGET_SSE_MATH)
26206 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26207 reg_alloc_order [pos++] = i;
26209 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26210 reg_alloc_order [pos++] = i;
26212 /* Initialize the rest of array as we do not allocate some registers
26214 while (pos < FIRST_PSEUDO_REGISTER)
26215 reg_alloc_order [pos++] = 0;
26218 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26219 struct attribute_spec.handler. */
26221 ix86_handle_abi_attribute (tree *node, tree name,
26222 tree args ATTRIBUTE_UNUSED,
26223 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26225 if (TREE_CODE (*node) != FUNCTION_TYPE
26226 && TREE_CODE (*node) != METHOD_TYPE
26227 && TREE_CODE (*node) != FIELD_DECL
26228 && TREE_CODE (*node) != TYPE_DECL)
26230 warning (OPT_Wattributes, "%qs attribute only applies to functions",
26231 IDENTIFIER_POINTER (name));
26232 *no_add_attrs = true;
26237 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
26238 IDENTIFIER_POINTER (name));
26239 *no_add_attrs = true;
26243 /* Can combine regparm with all attributes but fastcall. */
26244 if (is_attribute_p ("ms_abi", name))
26246 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26248 error ("ms_abi and sysv_abi attributes are not compatible");
26253 else if (is_attribute_p ("sysv_abi", name))
26255 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26257 error ("ms_abi and sysv_abi attributes are not compatible");
26266 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26267 struct attribute_spec.handler. */
26269 ix86_handle_struct_attribute (tree *node, tree name,
26270 tree args ATTRIBUTE_UNUSED,
26271 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26274 if (DECL_P (*node))
26276 if (TREE_CODE (*node) == TYPE_DECL)
26277 type = &TREE_TYPE (*node);
26282 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26283 || TREE_CODE (*type) == UNION_TYPE)))
26285 warning (OPT_Wattributes, "%qs attribute ignored",
26286 IDENTIFIER_POINTER (name));
26287 *no_add_attrs = true;
26290 else if ((is_attribute_p ("ms_struct", name)
26291 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26292 || ((is_attribute_p ("gcc_struct", name)
26293 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26295 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
26296 IDENTIFIER_POINTER (name));
26297 *no_add_attrs = true;
26304 ix86_ms_bitfield_layout_p (const_tree record_type)
26306 return (TARGET_MS_BITFIELD_LAYOUT &&
26307 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26308 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26311 /* Returns an expression indicating where the this parameter is
26312 located on entry to the FUNCTION. */
26315 x86_this_parameter (tree function)
26317 tree type = TREE_TYPE (function);
26318 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26323 const int *parm_regs;
26325 if (ix86_function_type_abi (type) == MS_ABI)
26326 parm_regs = x86_64_ms_abi_int_parameter_registers;
26328 parm_regs = x86_64_int_parameter_registers;
26329 return gen_rtx_REG (DImode, parm_regs[aggr]);
26332 nregs = ix86_function_regparm (type, function);
26334 if (nregs > 0 && !stdarg_p (type))
26338 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26339 regno = aggr ? DX_REG : CX_REG;
26347 return gen_rtx_MEM (SImode,
26348 plus_constant (stack_pointer_rtx, 4));
26351 return gen_rtx_REG (SImode, regno);
26354 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26357 /* Determine whether x86_output_mi_thunk can succeed. */
26360 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26361 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26362 HOST_WIDE_INT vcall_offset, const_tree function)
26364 /* 64-bit can handle anything. */
26368 /* For 32-bit, everything's fine if we have one free register. */
26369 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26372 /* Need a free register for vcall_offset. */
26376 /* Need a free register for GOT references. */
26377 if (flag_pic && !(*targetm.binds_local_p) (function))
26380 /* Otherwise ok. */
26384 /* Output the assembler code for a thunk function. THUNK_DECL is the
26385 declaration for the thunk function itself, FUNCTION is the decl for
26386 the target function. DELTA is an immediate constant offset to be
26387 added to THIS. If VCALL_OFFSET is nonzero, the word at
26388 *(*this + vcall_offset) should be added to THIS. */
26391 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
26392 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
26393 HOST_WIDE_INT vcall_offset, tree function)
26396 rtx this_param = x86_this_parameter (function);
26399 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
26400 pull it in now and let DELTA benefit. */
26401 if (REG_P (this_param))
26402 this_reg = this_param;
26403 else if (vcall_offset)
26405 /* Put the this parameter into %eax. */
26406 xops[0] = this_param;
26407 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
26408 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26411 this_reg = NULL_RTX;
26413 /* Adjust the this parameter by a fixed constant. */
26416 xops[0] = GEN_INT (delta);
26417 xops[1] = this_reg ? this_reg : this_param;
26420 if (!x86_64_general_operand (xops[0], DImode))
26422 tmp = gen_rtx_REG (DImode, R10_REG);
26424 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
26426 xops[1] = this_param;
26428 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
26431 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
26434 /* Adjust the this parameter by a value stored in the vtable. */
26438 tmp = gen_rtx_REG (DImode, R10_REG);
26441 int tmp_regno = CX_REG;
26442 if (lookup_attribute ("fastcall",
26443 TYPE_ATTRIBUTES (TREE_TYPE (function))))
26444 tmp_regno = AX_REG;
26445 tmp = gen_rtx_REG (SImode, tmp_regno);
26448 xops[0] = gen_rtx_MEM (Pmode, this_reg);
26450 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26452 /* Adjust the this parameter. */
26453 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
26454 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
26456 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
26457 xops[0] = GEN_INT (vcall_offset);
26459 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
26460 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
26462 xops[1] = this_reg;
26463 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
26466 /* If necessary, drop THIS back to its stack slot. */
26467 if (this_reg && this_reg != this_param)
26469 xops[0] = this_reg;
26470 xops[1] = this_param;
26471 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26474 xops[0] = XEXP (DECL_RTL (function), 0);
26477 if (!flag_pic || (*targetm.binds_local_p) (function))
26478 output_asm_insn ("jmp\t%P0", xops);
26479 /* All thunks should be in the same object as their target,
26480 and thus binds_local_p should be true. */
26481 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
26482 gcc_unreachable ();
26485 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
26486 tmp = gen_rtx_CONST (Pmode, tmp);
26487 tmp = gen_rtx_MEM (QImode, tmp);
26489 output_asm_insn ("jmp\t%A0", xops);
26494 if (!flag_pic || (*targetm.binds_local_p) (function))
26495 output_asm_insn ("jmp\t%P0", xops);
26500 rtx sym_ref = XEXP (DECL_RTL (function), 0);
26501 tmp = (gen_rtx_SYMBOL_REF
26503 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
26504 tmp = gen_rtx_MEM (QImode, tmp);
26506 output_asm_insn ("jmp\t%0", xops);
26509 #endif /* TARGET_MACHO */
26511 tmp = gen_rtx_REG (SImode, CX_REG);
26512 output_set_got (tmp, NULL_RTX);
26515 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
26516 output_asm_insn ("jmp\t{*}%1", xops);
26522 x86_file_start (void)
26524 default_file_start ();
26526 darwin_file_start ();
26528 if (X86_FILE_START_VERSION_DIRECTIVE)
26529 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
26530 if (X86_FILE_START_FLTUSED)
26531 fputs ("\t.global\t__fltused\n", asm_out_file);
26532 if (ix86_asm_dialect == ASM_INTEL)
26533 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
26537 x86_field_alignment (tree field, int computed)
26539 enum machine_mode mode;
26540 tree type = TREE_TYPE (field);
26542 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
26544 mode = TYPE_MODE (strip_array_types (type));
26545 if (mode == DFmode || mode == DCmode
26546 || GET_MODE_CLASS (mode) == MODE_INT
26547 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
26548 return MIN (32, computed);
26552 /* Output assembler code to FILE to increment profiler label # LABELNO
26553 for profiling a function entry. */
26555 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
26559 #ifndef NO_PROFILE_COUNTERS
26560 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
26563 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
26564 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
26566 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26570 #ifndef NO_PROFILE_COUNTERS
26571 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
26572 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
26574 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
26578 #ifndef NO_PROFILE_COUNTERS
26579 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
26580 PROFILE_COUNT_REGISTER);
26582 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26586 /* We don't have exact information about the insn sizes, but we may assume
26587 quite safely that we are informed about all 1 byte insns and memory
26588 address sizes. This is enough to eliminate unnecessary padding in
26592 min_insn_size (rtx insn)
26596 if (!INSN_P (insn) || !active_insn_p (insn))
26599 /* Discard alignments we've emit and jump instructions. */
26600 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26601 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26604 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
26605 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
26608 /* Important case - calls are always 5 bytes.
26609 It is common to have many calls in the row. */
26611 && symbolic_reference_mentioned_p (PATTERN (insn))
26612 && !SIBLING_CALL_P (insn))
26614 if (get_attr_length (insn) <= 1)
26617 /* For normal instructions we may rely on the sizes of addresses
26618 and the presence of symbol to require 4 bytes of encoding.
26619 This is not the case for jumps where references are PC relative. */
26620 if (!JUMP_P (insn))
26622 l = get_attr_length_address (insn);
26623 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
26632 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
26636 ix86_avoid_jump_misspredicts (void)
26638 rtx insn, start = get_insns ();
26639 int nbytes = 0, njumps = 0;
26642 /* Look for all minimal intervals of instructions containing 4 jumps.
26643 The intervals are bounded by START and INSN. NBYTES is the total
26644 size of instructions in the interval including INSN and not including
26645 START. When the NBYTES is smaller than 16 bytes, it is possible
26646 that the end of START and INSN ends up in the same 16byte page.
26648 The smallest offset in the page INSN can start is the case where START
26649 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
26650 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
26652 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26655 nbytes += min_insn_size (insn);
26657 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
26658 INSN_UID (insn), min_insn_size (insn));
26660 && GET_CODE (PATTERN (insn)) != ADDR_VEC
26661 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
26669 start = NEXT_INSN (start);
26670 if ((JUMP_P (start)
26671 && GET_CODE (PATTERN (start)) != ADDR_VEC
26672 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
26674 njumps--, isjump = 1;
26677 nbytes -= min_insn_size (start);
26679 gcc_assert (njumps >= 0);
26681 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
26682 INSN_UID (start), INSN_UID (insn), nbytes);
26684 if (njumps == 3 && isjump && nbytes < 16)
26686 int padsize = 15 - nbytes + min_insn_size (insn);
26689 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
26690 INSN_UID (insn), padsize);
26691 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
26696 /* AMD Athlon works faster
26697 when RET is not destination of conditional jump or directly preceded
26698 by other jump instruction. We avoid the penalty by inserting NOP just
26699 before the RET instructions in such cases. */
26701 ix86_pad_returns (void)
26706 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
26708 basic_block bb = e->src;
26709 rtx ret = BB_END (bb);
26711 bool replace = false;
26713 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
26714 || optimize_bb_for_size_p (bb))
26716 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
26717 if (active_insn_p (prev) || LABEL_P (prev))
26719 if (prev && LABEL_P (prev))
26724 FOR_EACH_EDGE (e, ei, bb->preds)
26725 if (EDGE_FREQUENCY (e) && e->src->index >= 0
26726 && !(e->flags & EDGE_FALLTHRU))
26731 prev = prev_active_insn (ret);
26733 && ((JUMP_P (prev) && any_condjump_p (prev))
26736 /* Empty functions get branch mispredict even when the jump destination
26737 is not visible to us. */
26738 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
26743 emit_insn_before (gen_return_internal_long (), ret);
26749 /* Implement machine specific optimizations. We implement padding of returns
26750 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
26754 if (TARGET_PAD_RETURNS && optimize
26755 && optimize_function_for_speed_p (cfun))
26756 ix86_pad_returns ();
26757 if (TARGET_FOUR_JUMP_LIMIT && optimize
26758 && optimize_function_for_speed_p (cfun))
26759 ix86_avoid_jump_misspredicts ();
26762 /* Return nonzero when QImode register that must be represented via REX prefix
26765 x86_extended_QIreg_mentioned_p (rtx insn)
26768 extract_insn_cached (insn);
26769 for (i = 0; i < recog_data.n_operands; i++)
26770 if (REG_P (recog_data.operand[i])
26771 && REGNO (recog_data.operand[i]) >= 4)
26776 /* Return nonzero when P points to register encoded via REX prefix.
26777 Called via for_each_rtx. */
26779 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
26781 unsigned int regno;
26784 regno = REGNO (*p);
26785 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
26788 /* Return true when INSN mentions register that must be encoded using REX
26791 x86_extended_reg_mentioned_p (rtx insn)
26793 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
26794 extended_reg_mentioned_1, NULL);
26797 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
26798 optabs would emit if we didn't have TFmode patterns. */
26801 x86_emit_floatuns (rtx operands[2])
26803 rtx neglab, donelab, i0, i1, f0, in, out;
26804 enum machine_mode mode, inmode;
26806 inmode = GET_MODE (operands[1]);
26807 gcc_assert (inmode == SImode || inmode == DImode);
26810 in = force_reg (inmode, operands[1]);
26811 mode = GET_MODE (out);
26812 neglab = gen_label_rtx ();
26813 donelab = gen_label_rtx ();
26814 f0 = gen_reg_rtx (mode);
26816 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
26818 expand_float (out, in, 0);
26820 emit_jump_insn (gen_jump (donelab));
26823 emit_label (neglab);
26825 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
26827 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
26829 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
26831 expand_float (f0, i0, 0);
26833 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
26835 emit_label (donelab);
26838 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26839 with all elements equal to VAR. Return true if successful. */
26842 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
26843 rtx target, rtx val)
26845 enum machine_mode hmode, smode, wsmode, wvmode;
26860 val = force_reg (GET_MODE_INNER (mode), val);
26861 x = gen_rtx_VEC_DUPLICATE (mode, val);
26862 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26868 if (TARGET_SSE || TARGET_3DNOW_A)
26870 val = gen_lowpart (SImode, val);
26871 x = gen_rtx_TRUNCATE (HImode, val);
26872 x = gen_rtx_VEC_DUPLICATE (mode, x);
26873 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26895 /* Extend HImode to SImode using a paradoxical SUBREG. */
26896 tmp1 = gen_reg_rtx (SImode);
26897 emit_move_insn (tmp1, gen_lowpart (SImode, val));
26898 /* Insert the SImode value as low element of V4SImode vector. */
26899 tmp2 = gen_reg_rtx (V4SImode);
26900 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
26901 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
26902 CONST0_RTX (V4SImode),
26904 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
26905 /* Cast the V4SImode vector back to a V8HImode vector. */
26906 tmp1 = gen_reg_rtx (V8HImode);
26907 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
26908 /* Duplicate the low short through the whole low SImode word. */
26909 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
26910 /* Cast the V8HImode vector back to a V4SImode vector. */
26911 tmp2 = gen_reg_rtx (V4SImode);
26912 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
26913 /* Replicate the low element of the V4SImode vector. */
26914 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
26915 /* Cast the V2SImode back to V8HImode, and store in target. */
26916 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
26927 /* Extend QImode to SImode using a paradoxical SUBREG. */
26928 tmp1 = gen_reg_rtx (SImode);
26929 emit_move_insn (tmp1, gen_lowpart (SImode, val));
26930 /* Insert the SImode value as low element of V4SImode vector. */
26931 tmp2 = gen_reg_rtx (V4SImode);
26932 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
26933 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
26934 CONST0_RTX (V4SImode),
26936 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
26937 /* Cast the V4SImode vector back to a V16QImode vector. */
26938 tmp1 = gen_reg_rtx (V16QImode);
26939 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
26940 /* Duplicate the low byte through the whole low SImode word. */
26941 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
26942 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
26943 /* Cast the V16QImode vector back to a V4SImode vector. */
26944 tmp2 = gen_reg_rtx (V4SImode);
26945 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
26946 /* Replicate the low element of the V4SImode vector. */
26947 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
26948 /* Cast the V2SImode back to V16QImode, and store in target. */
26949 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
26957 /* Replicate the value once into the next wider mode and recurse. */
26958 val = convert_modes (wsmode, smode, val, true);
26959 x = expand_simple_binop (wsmode, ASHIFT, val,
26960 GEN_INT (GET_MODE_BITSIZE (smode)),
26961 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26962 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
26964 x = gen_reg_rtx (wvmode);
26965 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
26966 gcc_unreachable ();
26967 emit_move_insn (target, gen_lowpart (mode, x));
26990 rtx tmp = gen_reg_rtx (hmode);
26991 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
26992 emit_insn (gen_rtx_SET (VOIDmode, target,
26993 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27002 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27003 whose ONE_VAR element is VAR, and other elements are zero. Return true
27007 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27008 rtx target, rtx var, int one_var)
27010 enum machine_mode vsimode;
27013 bool use_vector_set = false;
27018 /* For SSE4.1, we normally use vector set. But if the second
27019 element is zero and inter-unit moves are OK, we use movq
27021 use_vector_set = (TARGET_64BIT
27023 && !(TARGET_INTER_UNIT_MOVES
27029 use_vector_set = TARGET_SSE4_1;
27032 use_vector_set = TARGET_SSE2;
27035 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27043 use_vector_set = TARGET_AVX;
27049 if (use_vector_set)
27051 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27052 var = force_reg (GET_MODE_INNER (mode), var);
27053 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27069 var = force_reg (GET_MODE_INNER (mode), var);
27070 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27071 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27076 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27077 new_target = gen_reg_rtx (mode);
27079 new_target = target;
27080 var = force_reg (GET_MODE_INNER (mode), var);
27081 x = gen_rtx_VEC_DUPLICATE (mode, var);
27082 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27083 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27086 /* We need to shuffle the value to the correct position, so
27087 create a new pseudo to store the intermediate result. */
27089 /* With SSE2, we can use the integer shuffle insns. */
27090 if (mode != V4SFmode && TARGET_SSE2)
27092 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27094 GEN_INT (one_var == 1 ? 0 : 1),
27095 GEN_INT (one_var == 2 ? 0 : 1),
27096 GEN_INT (one_var == 3 ? 0 : 1)));
27097 if (target != new_target)
27098 emit_move_insn (target, new_target);
27102 /* Otherwise convert the intermediate result to V4SFmode and
27103 use the SSE1 shuffle instructions. */
27104 if (mode != V4SFmode)
27106 tmp = gen_reg_rtx (V4SFmode);
27107 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27112 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27114 GEN_INT (one_var == 1 ? 0 : 1),
27115 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27116 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27118 if (mode != V4SFmode)
27119 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27120 else if (tmp != target)
27121 emit_move_insn (target, tmp);
27123 else if (target != new_target)
27124 emit_move_insn (target, new_target);
27129 vsimode = V4SImode;
27135 vsimode = V2SImode;
27141 /* Zero extend the variable element to SImode and recurse. */
27142 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27144 x = gen_reg_rtx (vsimode);
27145 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27147 gcc_unreachable ();
27149 emit_move_insn (target, gen_lowpart (mode, x));
27157 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27158 consisting of the values in VALS. It is known that all elements
27159 except ONE_VAR are constants. Return true if successful. */
27162 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27163 rtx target, rtx vals, int one_var)
27165 rtx var = XVECEXP (vals, 0, one_var);
27166 enum machine_mode wmode;
27169 const_vec = copy_rtx (vals);
27170 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27171 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27179 /* For the two element vectors, it's just as easy to use
27180 the general case. */
27204 /* There's no way to set one QImode entry easily. Combine
27205 the variable value with its adjacent constant value, and
27206 promote to an HImode set. */
27207 x = XVECEXP (vals, 0, one_var ^ 1);
27210 var = convert_modes (HImode, QImode, var, true);
27211 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27212 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27213 x = GEN_INT (INTVAL (x) & 0xff);
27217 var = convert_modes (HImode, QImode, var, true);
27218 x = gen_int_mode (INTVAL (x) << 8, HImode);
27220 if (x != const0_rtx)
27221 var = expand_simple_binop (HImode, IOR, var, x, var,
27222 1, OPTAB_LIB_WIDEN);
27224 x = gen_reg_rtx (wmode);
27225 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27226 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27228 emit_move_insn (target, gen_lowpart (mode, x));
27235 emit_move_insn (target, const_vec);
27236 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27240 /* A subroutine of ix86_expand_vector_init_general. Use vector
27241 concatenate to handle the most general case: all values variable,
27242 and none identical. */
27245 ix86_expand_vector_init_concat (enum machine_mode mode,
27246 rtx target, rtx *ops, int n)
27248 enum machine_mode cmode, hmode = VOIDmode;
27249 rtx first[8], second[4];
27289 gcc_unreachable ();
27292 if (!register_operand (ops[1], cmode))
27293 ops[1] = force_reg (cmode, ops[1]);
27294 if (!register_operand (ops[0], cmode))
27295 ops[0] = force_reg (cmode, ops[0]);
27296 emit_insn (gen_rtx_SET (VOIDmode, target,
27297 gen_rtx_VEC_CONCAT (mode, ops[0],
27317 gcc_unreachable ();
27333 gcc_unreachable ();
27338 /* FIXME: We process inputs backward to help RA. PR 36222. */
27341 for (; i > 0; i -= 2, j--)
27343 first[j] = gen_reg_rtx (cmode);
27344 v = gen_rtvec (2, ops[i - 1], ops[i]);
27345 ix86_expand_vector_init (false, first[j],
27346 gen_rtx_PARALLEL (cmode, v));
27352 gcc_assert (hmode != VOIDmode);
27353 for (i = j = 0; i < n; i += 2, j++)
27355 second[j] = gen_reg_rtx (hmode);
27356 ix86_expand_vector_init_concat (hmode, second [j],
27360 ix86_expand_vector_init_concat (mode, target, second, n);
27363 ix86_expand_vector_init_concat (mode, target, first, n);
27367 gcc_unreachable ();
27371 /* A subroutine of ix86_expand_vector_init_general. Use vector
27372 interleave to handle the most general case: all values variable,
27373 and none identical. */
27376 ix86_expand_vector_init_interleave (enum machine_mode mode,
27377 rtx target, rtx *ops, int n)
27379 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27382 rtx (*gen_load_even) (rtx, rtx, rtx);
27383 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27384 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
27389 gen_load_even = gen_vec_setv8hi;
27390 gen_interleave_first_low = gen_vec_interleave_lowv4si;
27391 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27392 inner_mode = HImode;
27393 first_imode = V4SImode;
27394 second_imode = V2DImode;
27395 third_imode = VOIDmode;
27398 gen_load_even = gen_vec_setv16qi;
27399 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
27400 gen_interleave_second_low = gen_vec_interleave_lowv4si;
27401 inner_mode = QImode;
27402 first_imode = V8HImode;
27403 second_imode = V4SImode;
27404 third_imode = V2DImode;
27407 gcc_unreachable ();
27410 for (i = 0; i < n; i++)
27412 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
27413 op0 = gen_reg_rtx (SImode);
27414 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
27416 /* Insert the SImode value as low element of V4SImode vector. */
27417 op1 = gen_reg_rtx (V4SImode);
27418 op0 = gen_rtx_VEC_MERGE (V4SImode,
27419 gen_rtx_VEC_DUPLICATE (V4SImode,
27421 CONST0_RTX (V4SImode),
27423 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
27425 /* Cast the V4SImode vector back to a vector in orignal mode. */
27426 op0 = gen_reg_rtx (mode);
27427 emit_move_insn (op0, gen_lowpart (mode, op1));
27429 /* Load even elements into the second positon. */
27430 emit_insn ((*gen_load_even) (op0,
27431 force_reg (inner_mode,
27435 /* Cast vector to FIRST_IMODE vector. */
27436 ops[i] = gen_reg_rtx (first_imode);
27437 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
27440 /* Interleave low FIRST_IMODE vectors. */
27441 for (i = j = 0; i < n; i += 2, j++)
27443 op0 = gen_reg_rtx (first_imode);
27444 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
27446 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
27447 ops[j] = gen_reg_rtx (second_imode);
27448 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
27451 /* Interleave low SECOND_IMODE vectors. */
27452 switch (second_imode)
27455 for (i = j = 0; i < n / 2; i += 2, j++)
27457 op0 = gen_reg_rtx (second_imode);
27458 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
27461 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
27463 ops[j] = gen_reg_rtx (third_imode);
27464 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
27466 second_imode = V2DImode;
27467 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27471 op0 = gen_reg_rtx (second_imode);
27472 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
27475 /* Cast the SECOND_IMODE vector back to a vector on original
27477 emit_insn (gen_rtx_SET (VOIDmode, target,
27478 gen_lowpart (mode, op0)));
27482 gcc_unreachable ();
27486 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
27487 all values variable, and none identical. */
27490 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
27491 rtx target, rtx vals)
27493 rtx ops[32], op0, op1;
27494 enum machine_mode half_mode = VOIDmode;
27501 if (!mmx_ok && !TARGET_SSE)
27513 n = GET_MODE_NUNITS (mode);
27514 for (i = 0; i < n; i++)
27515 ops[i] = XVECEXP (vals, 0, i);
27516 ix86_expand_vector_init_concat (mode, target, ops, n);
27520 half_mode = V16QImode;
27524 half_mode = V8HImode;
27528 n = GET_MODE_NUNITS (mode);
27529 for (i = 0; i < n; i++)
27530 ops[i] = XVECEXP (vals, 0, i);
27531 op0 = gen_reg_rtx (half_mode);
27532 op1 = gen_reg_rtx (half_mode);
27533 ix86_expand_vector_init_interleave (half_mode, op0, ops,
27535 ix86_expand_vector_init_interleave (half_mode, op1,
27536 &ops [n >> 1], n >> 2);
27537 emit_insn (gen_rtx_SET (VOIDmode, target,
27538 gen_rtx_VEC_CONCAT (mode, op0, op1)));
27542 if (!TARGET_SSE4_1)
27550 /* Don't use ix86_expand_vector_init_interleave if we can't
27551 move from GPR to SSE register directly. */
27552 if (!TARGET_INTER_UNIT_MOVES)
27555 n = GET_MODE_NUNITS (mode);
27556 for (i = 0; i < n; i++)
27557 ops[i] = XVECEXP (vals, 0, i);
27558 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
27566 gcc_unreachable ();
27570 int i, j, n_elts, n_words, n_elt_per_word;
27571 enum machine_mode inner_mode;
27572 rtx words[4], shift;
27574 inner_mode = GET_MODE_INNER (mode);
27575 n_elts = GET_MODE_NUNITS (mode);
27576 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27577 n_elt_per_word = n_elts / n_words;
27578 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
27580 for (i = 0; i < n_words; ++i)
27582 rtx word = NULL_RTX;
27584 for (j = 0; j < n_elt_per_word; ++j)
27586 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
27587 elt = convert_modes (word_mode, inner_mode, elt, true);
27593 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
27594 word, 1, OPTAB_LIB_WIDEN);
27595 word = expand_simple_binop (word_mode, IOR, word, elt,
27596 word, 1, OPTAB_LIB_WIDEN);
27604 emit_move_insn (target, gen_lowpart (mode, words[0]));
27605 else if (n_words == 2)
27607 rtx tmp = gen_reg_rtx (mode);
27608 emit_clobber (tmp);
27609 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27610 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27611 emit_move_insn (target, tmp);
27613 else if (n_words == 4)
27615 rtx tmp = gen_reg_rtx (V4SImode);
27616 gcc_assert (word_mode == SImode);
27617 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
27618 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
27619 emit_move_insn (target, gen_lowpart (mode, tmp));
27622 gcc_unreachable ();
27626 /* Initialize vector TARGET via VALS. Suppress the use of MMX
27627 instructions unless MMX_OK is true. */
27630 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
27632 enum machine_mode mode = GET_MODE (target);
27633 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27634 int n_elts = GET_MODE_NUNITS (mode);
27635 int n_var = 0, one_var = -1;
27636 bool all_same = true, all_const_zero = true;
27640 for (i = 0; i < n_elts; ++i)
27642 x = XVECEXP (vals, 0, i);
27643 if (!(CONST_INT_P (x)
27644 || GET_CODE (x) == CONST_DOUBLE
27645 || GET_CODE (x) == CONST_FIXED))
27646 n_var++, one_var = i;
27647 else if (x != CONST0_RTX (inner_mode))
27648 all_const_zero = false;
27649 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
27653 /* Constants are best loaded from the constant pool. */
27656 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
27660 /* If all values are identical, broadcast the value. */
27662 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
27663 XVECEXP (vals, 0, 0)))
27666 /* Values where only one field is non-constant are best loaded from
27667 the pool and overwritten via move later. */
27671 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
27672 XVECEXP (vals, 0, one_var),
27676 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
27680 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
27684 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
27686 enum machine_mode mode = GET_MODE (target);
27687 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27688 enum machine_mode half_mode;
27689 bool use_vec_merge = false;
27691 static rtx (*gen_extract[6][2]) (rtx, rtx)
27693 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
27694 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
27695 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
27696 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
27697 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
27698 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
27700 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
27702 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
27703 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
27704 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
27705 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
27706 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
27707 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
27717 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
27718 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
27720 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
27722 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
27723 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27729 use_vec_merge = TARGET_SSE4_1;
27737 /* For the two element vectors, we implement a VEC_CONCAT with
27738 the extraction of the other element. */
27740 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
27741 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
27744 op0 = val, op1 = tmp;
27746 op0 = tmp, op1 = val;
27748 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
27749 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27754 use_vec_merge = TARGET_SSE4_1;
27761 use_vec_merge = true;
27765 /* tmp = target = A B C D */
27766 tmp = copy_to_reg (target);
27767 /* target = A A B B */
27768 emit_insn (gen_sse_unpcklps (target, target, target));
27769 /* target = X A B B */
27770 ix86_expand_vector_set (false, target, val, 0);
27771 /* target = A X C D */
27772 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27773 GEN_INT (1), GEN_INT (0),
27774 GEN_INT (2+4), GEN_INT (3+4)));
27778 /* tmp = target = A B C D */
27779 tmp = copy_to_reg (target);
27780 /* tmp = X B C D */
27781 ix86_expand_vector_set (false, tmp, val, 0);
27782 /* target = A B X D */
27783 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27784 GEN_INT (0), GEN_INT (1),
27785 GEN_INT (0+4), GEN_INT (3+4)));
27789 /* tmp = target = A B C D */
27790 tmp = copy_to_reg (target);
27791 /* tmp = X B C D */
27792 ix86_expand_vector_set (false, tmp, val, 0);
27793 /* target = A B X D */
27794 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27795 GEN_INT (0), GEN_INT (1),
27796 GEN_INT (2+4), GEN_INT (0+4)));
27800 gcc_unreachable ();
27805 use_vec_merge = TARGET_SSE4_1;
27809 /* Element 0 handled by vec_merge below. */
27812 use_vec_merge = true;
27818 /* With SSE2, use integer shuffles to swap element 0 and ELT,
27819 store into element 0, then shuffle them back. */
27823 order[0] = GEN_INT (elt);
27824 order[1] = const1_rtx;
27825 order[2] = const2_rtx;
27826 order[3] = GEN_INT (3);
27827 order[elt] = const0_rtx;
27829 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27830 order[1], order[2], order[3]));
27832 ix86_expand_vector_set (false, target, val, 0);
27834 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27835 order[1], order[2], order[3]));
27839 /* For SSE1, we have to reuse the V4SF code. */
27840 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
27841 gen_lowpart (SFmode, val), elt);
27846 use_vec_merge = TARGET_SSE2;
27849 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27853 use_vec_merge = TARGET_SSE4_1;
27860 half_mode = V16QImode;
27866 half_mode = V8HImode;
27872 half_mode = V4SImode;
27878 half_mode = V2DImode;
27884 half_mode = V4SFmode;
27890 half_mode = V2DFmode;
27896 /* Compute offset. */
27900 gcc_assert (i <= 1);
27902 /* Extract the half. */
27903 tmp = gen_reg_rtx (half_mode);
27904 emit_insn ((*gen_extract[j][i]) (tmp, target));
27906 /* Put val in tmp at elt. */
27907 ix86_expand_vector_set (false, tmp, val, elt);
27910 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
27919 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
27920 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
27921 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27925 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27927 emit_move_insn (mem, target);
27929 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27930 emit_move_insn (tmp, val);
27932 emit_move_insn (target, mem);
27937 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
27939 enum machine_mode mode = GET_MODE (vec);
27940 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27941 bool use_vec_extr = false;
27954 use_vec_extr = true;
27958 use_vec_extr = TARGET_SSE4_1;
27970 tmp = gen_reg_rtx (mode);
27971 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
27972 GEN_INT (elt), GEN_INT (elt),
27973 GEN_INT (elt+4), GEN_INT (elt+4)));
27977 tmp = gen_reg_rtx (mode);
27978 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
27982 gcc_unreachable ();
27985 use_vec_extr = true;
27990 use_vec_extr = TARGET_SSE4_1;
28004 tmp = gen_reg_rtx (mode);
28005 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28006 GEN_INT (elt), GEN_INT (elt),
28007 GEN_INT (elt), GEN_INT (elt)));
28011 tmp = gen_reg_rtx (mode);
28012 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28016 gcc_unreachable ();
28019 use_vec_extr = true;
28024 /* For SSE1, we have to reuse the V4SF code. */
28025 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28026 gen_lowpart (V4SFmode, vec), elt);
28032 use_vec_extr = TARGET_SSE2;
28035 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28039 use_vec_extr = TARGET_SSE4_1;
28043 /* ??? Could extract the appropriate HImode element and shift. */
28050 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28051 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28053 /* Let the rtl optimizers know about the zero extension performed. */
28054 if (inner_mode == QImode || inner_mode == HImode)
28056 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28057 target = gen_lowpart (SImode, target);
28060 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28064 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28066 emit_move_insn (mem, vec);
28068 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28069 emit_move_insn (target, tmp);
28073 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28074 pattern to reduce; DEST is the destination; IN is the input vector. */
28077 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28079 rtx tmp1, tmp2, tmp3;
28081 tmp1 = gen_reg_rtx (V4SFmode);
28082 tmp2 = gen_reg_rtx (V4SFmode);
28083 tmp3 = gen_reg_rtx (V4SFmode);
28085 emit_insn (gen_sse_movhlps (tmp1, in, in));
28086 emit_insn (fn (tmp2, tmp1, in));
28088 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28089 GEN_INT (1), GEN_INT (1),
28090 GEN_INT (1+4), GEN_INT (1+4)));
28091 emit_insn (fn (dest, tmp2, tmp3));
28094 /* Target hook for scalar_mode_supported_p. */
28096 ix86_scalar_mode_supported_p (enum machine_mode mode)
28098 if (DECIMAL_FLOAT_MODE_P (mode))
28100 else if (mode == TFmode)
28103 return default_scalar_mode_supported_p (mode);
28106 /* Implements target hook vector_mode_supported_p. */
28108 ix86_vector_mode_supported_p (enum machine_mode mode)
28110 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28112 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28114 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28116 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28118 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28123 /* Target hook for c_mode_for_suffix. */
28124 static enum machine_mode
28125 ix86_c_mode_for_suffix (char suffix)
28135 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28137 We do this in the new i386 backend to maintain source compatibility
28138 with the old cc0-based compiler. */
28141 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28142 tree inputs ATTRIBUTE_UNUSED,
28145 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28147 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28152 /* Implements target vector targetm.asm.encode_section_info. This
28153 is not used by netware. */
28155 static void ATTRIBUTE_UNUSED
28156 ix86_encode_section_info (tree decl, rtx rtl, int first)
28158 default_encode_section_info (decl, rtl, first);
28160 if (TREE_CODE (decl) == VAR_DECL
28161 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28162 && ix86_in_large_data_p (decl))
28163 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28166 /* Worker function for REVERSE_CONDITION. */
28169 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28171 return (mode != CCFPmode && mode != CCFPUmode
28172 ? reverse_condition (code)
28173 : reverse_condition_maybe_unordered (code));
28176 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28180 output_387_reg_move (rtx insn, rtx *operands)
28182 if (REG_P (operands[0]))
28184 if (REG_P (operands[1])
28185 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28187 if (REGNO (operands[0]) == FIRST_STACK_REG)
28188 return output_387_ffreep (operands, 0);
28189 return "fstp\t%y0";
28191 if (STACK_TOP_P (operands[0]))
28192 return "fld%z1\t%y1";
28195 else if (MEM_P (operands[0]))
28197 gcc_assert (REG_P (operands[1]));
28198 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28199 return "fstp%z0\t%y0";
28202 /* There is no non-popping store to memory for XFmode.
28203 So if we need one, follow the store with a load. */
28204 if (GET_MODE (operands[0]) == XFmode)
28205 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
28207 return "fst%z0\t%y0";
28214 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28215 FP status register is set. */
28218 ix86_emit_fp_unordered_jump (rtx label)
28220 rtx reg = gen_reg_rtx (HImode);
28223 emit_insn (gen_x86_fnstsw_1 (reg));
28225 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28227 emit_insn (gen_x86_sahf_1 (reg));
28229 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28230 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28234 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28236 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28237 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28240 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28241 gen_rtx_LABEL_REF (VOIDmode, label),
28243 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28245 emit_jump_insn (temp);
28246 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28249 /* Output code to perform a log1p XFmode calculation. */
28251 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28253 rtx label1 = gen_label_rtx ();
28254 rtx label2 = gen_label_rtx ();
28256 rtx tmp = gen_reg_rtx (XFmode);
28257 rtx tmp2 = gen_reg_rtx (XFmode);
28259 emit_insn (gen_absxf2 (tmp, op1));
28260 emit_insn (gen_cmpxf (tmp,
28261 CONST_DOUBLE_FROM_REAL_VALUE (
28262 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28264 emit_jump_insn (gen_bge (label1));
28266 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28267 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28268 emit_jump (label2);
28270 emit_label (label1);
28271 emit_move_insn (tmp, CONST1_RTX (XFmode));
28272 emit_insn (gen_addxf3 (tmp, op1, tmp));
28273 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28274 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28276 emit_label (label2);
28279 /* Output code to perform a Newton-Rhapson approximation of a single precision
28280 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28282 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28284 rtx x0, x1, e0, e1, two;
28286 x0 = gen_reg_rtx (mode);
28287 e0 = gen_reg_rtx (mode);
28288 e1 = gen_reg_rtx (mode);
28289 x1 = gen_reg_rtx (mode);
28291 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28293 if (VECTOR_MODE_P (mode))
28294 two = ix86_build_const_vector (SFmode, true, two);
28296 two = force_reg (mode, two);
28298 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28300 /* x0 = rcp(b) estimate */
28301 emit_insn (gen_rtx_SET (VOIDmode, x0,
28302 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28305 emit_insn (gen_rtx_SET (VOIDmode, e0,
28306 gen_rtx_MULT (mode, x0, b)));
28308 emit_insn (gen_rtx_SET (VOIDmode, e1,
28309 gen_rtx_MINUS (mode, two, e0)));
28311 emit_insn (gen_rtx_SET (VOIDmode, x1,
28312 gen_rtx_MULT (mode, x0, e1)));
28314 emit_insn (gen_rtx_SET (VOIDmode, res,
28315 gen_rtx_MULT (mode, a, x1)));
28318 /* Output code to perform a Newton-Rhapson approximation of a
28319 single precision floating point [reciprocal] square root. */
28321 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28324 rtx x0, e0, e1, e2, e3, mthree, mhalf;
28327 x0 = gen_reg_rtx (mode);
28328 e0 = gen_reg_rtx (mode);
28329 e1 = gen_reg_rtx (mode);
28330 e2 = gen_reg_rtx (mode);
28331 e3 = gen_reg_rtx (mode);
28333 real_from_integer (&r, VOIDmode, -3, -1, 0);
28334 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28336 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28337 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28339 if (VECTOR_MODE_P (mode))
28341 mthree = ix86_build_const_vector (SFmode, true, mthree);
28342 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28345 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28346 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28348 /* x0 = rsqrt(a) estimate */
28349 emit_insn (gen_rtx_SET (VOIDmode, x0,
28350 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28353 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
28358 zero = gen_reg_rtx (mode);
28359 mask = gen_reg_rtx (mode);
28361 zero = force_reg (mode, CONST0_RTX(mode));
28362 emit_insn (gen_rtx_SET (VOIDmode, mask,
28363 gen_rtx_NE (mode, zero, a)));
28365 emit_insn (gen_rtx_SET (VOIDmode, x0,
28366 gen_rtx_AND (mode, x0, mask)));
28370 emit_insn (gen_rtx_SET (VOIDmode, e0,
28371 gen_rtx_MULT (mode, x0, a)));
28373 emit_insn (gen_rtx_SET (VOIDmode, e1,
28374 gen_rtx_MULT (mode, e0, x0)));
28377 mthree = force_reg (mode, mthree);
28378 emit_insn (gen_rtx_SET (VOIDmode, e2,
28379 gen_rtx_PLUS (mode, e1, mthree)));
28381 mhalf = force_reg (mode, mhalf);
28383 /* e3 = -.5 * x0 */
28384 emit_insn (gen_rtx_SET (VOIDmode, e3,
28385 gen_rtx_MULT (mode, x0, mhalf)));
28387 /* e3 = -.5 * e0 */
28388 emit_insn (gen_rtx_SET (VOIDmode, e3,
28389 gen_rtx_MULT (mode, e0, mhalf)));
28390 /* ret = e2 * e3 */
28391 emit_insn (gen_rtx_SET (VOIDmode, res,
28392 gen_rtx_MULT (mode, e2, e3)));
28395 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
28397 static void ATTRIBUTE_UNUSED
28398 i386_solaris_elf_named_section (const char *name, unsigned int flags,
28401 /* With Binutils 2.15, the "@unwind" marker must be specified on
28402 every occurrence of the ".eh_frame" section, not just the first
28405 && strcmp (name, ".eh_frame") == 0)
28407 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
28408 flags & SECTION_WRITE ? "aw" : "a");
28411 default_elf_asm_named_section (name, flags, decl);
28414 /* Return the mangling of TYPE if it is an extended fundamental type. */
28416 static const char *
28417 ix86_mangle_type (const_tree type)
28419 type = TYPE_MAIN_VARIANT (type);
28421 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28422 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28425 switch (TYPE_MODE (type))
28428 /* __float128 is "g". */
28431 /* "long double" or __float80 is "e". */
28438 /* For 32-bit code we can save PIC register setup by using
28439 __stack_chk_fail_local hidden function instead of calling
28440 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
28441 register, so it is better to call __stack_chk_fail directly. */
28444 ix86_stack_protect_fail (void)
28446 return TARGET_64BIT
28447 ? default_external_stack_protect_fail ()
28448 : default_hidden_stack_protect_fail ();
28451 /* Select a format to encode pointers in exception handling data. CODE
28452 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
28453 true if the symbol may be affected by dynamic relocations.
28455 ??? All x86 object file formats are capable of representing this.
28456 After all, the relocation needed is the same as for the call insn.
28457 Whether or not a particular assembler allows us to enter such, I
28458 guess we'll have to see. */
28460 asm_preferred_eh_data_format (int code, int global)
28464 int type = DW_EH_PE_sdata8;
28466 || ix86_cmodel == CM_SMALL_PIC
28467 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
28468 type = DW_EH_PE_sdata4;
28469 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
28471 if (ix86_cmodel == CM_SMALL
28472 || (ix86_cmodel == CM_MEDIUM && code))
28473 return DW_EH_PE_udata4;
28474 return DW_EH_PE_absptr;
28477 /* Expand copysign from SIGN to the positive value ABS_VALUE
28478 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
28481 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
28483 enum machine_mode mode = GET_MODE (sign);
28484 rtx sgn = gen_reg_rtx (mode);
28485 if (mask == NULL_RTX)
28487 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
28488 if (!VECTOR_MODE_P (mode))
28490 /* We need to generate a scalar mode mask in this case. */
28491 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28492 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28493 mask = gen_reg_rtx (mode);
28494 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28498 mask = gen_rtx_NOT (mode, mask);
28499 emit_insn (gen_rtx_SET (VOIDmode, sgn,
28500 gen_rtx_AND (mode, mask, sign)));
28501 emit_insn (gen_rtx_SET (VOIDmode, result,
28502 gen_rtx_IOR (mode, abs_value, sgn)));
28505 /* Expand fabs (OP0) and return a new rtx that holds the result. The
28506 mask for masking out the sign-bit is stored in *SMASK, if that is
28509 ix86_expand_sse_fabs (rtx op0, rtx *smask)
28511 enum machine_mode mode = GET_MODE (op0);
28514 xa = gen_reg_rtx (mode);
28515 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
28516 if (!VECTOR_MODE_P (mode))
28518 /* We need to generate a scalar mode mask in this case. */
28519 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28520 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28521 mask = gen_reg_rtx (mode);
28522 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28524 emit_insn (gen_rtx_SET (VOIDmode, xa,
28525 gen_rtx_AND (mode, op0, mask)));
28533 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
28534 swapping the operands if SWAP_OPERANDS is true. The expanded
28535 code is a forward jump to a newly created label in case the
28536 comparison is true. The generated label rtx is returned. */
28538 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
28539 bool swap_operands)
28550 label = gen_label_rtx ();
28551 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
28552 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28553 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
28554 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
28555 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28556 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
28557 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
28558 JUMP_LABEL (tmp) = label;
28563 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
28564 using comparison code CODE. Operands are swapped for the comparison if
28565 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
28567 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
28568 bool swap_operands)
28570 enum machine_mode mode = GET_MODE (op0);
28571 rtx mask = gen_reg_rtx (mode);
28580 if (mode == DFmode)
28581 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
28582 gen_rtx_fmt_ee (code, mode, op0, op1)));
28584 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
28585 gen_rtx_fmt_ee (code, mode, op0, op1)));
28590 /* Generate and return a rtx of mode MODE for 2**n where n is the number
28591 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
28593 ix86_gen_TWO52 (enum machine_mode mode)
28595 REAL_VALUE_TYPE TWO52r;
28598 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
28599 TWO52 = const_double_from_real_value (TWO52r, mode);
28600 TWO52 = force_reg (mode, TWO52);
28605 /* Expand SSE sequence for computing lround from OP1 storing
28608 ix86_expand_lround (rtx op0, rtx op1)
28610 /* C code for the stuff we're doing below:
28611 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
28614 enum machine_mode mode = GET_MODE (op1);
28615 const struct real_format *fmt;
28616 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28619 /* load nextafter (0.5, 0.0) */
28620 fmt = REAL_MODE_FORMAT (mode);
28621 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28622 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28624 /* adj = copysign (0.5, op1) */
28625 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
28626 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
28628 /* adj = op1 + adj */
28629 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
28631 /* op0 = (imode)adj */
28632 expand_fix (op0, adj, 0);
28635 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
28638 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
28640 /* C code for the stuff we're doing below (for do_floor):
28642 xi -= (double)xi > op1 ? 1 : 0;
28645 enum machine_mode fmode = GET_MODE (op1);
28646 enum machine_mode imode = GET_MODE (op0);
28647 rtx ireg, freg, label, tmp;
28649 /* reg = (long)op1 */
28650 ireg = gen_reg_rtx (imode);
28651 expand_fix (ireg, op1, 0);
28653 /* freg = (double)reg */
28654 freg = gen_reg_rtx (fmode);
28655 expand_float (freg, ireg, 0);
28657 /* ireg = (freg > op1) ? ireg - 1 : ireg */
28658 label = ix86_expand_sse_compare_and_jump (UNLE,
28659 freg, op1, !do_floor);
28660 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
28661 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
28662 emit_move_insn (ireg, tmp);
28664 emit_label (label);
28665 LABEL_NUSES (label) = 1;
28667 emit_move_insn (op0, ireg);
28670 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
28671 result in OPERAND0. */
28673 ix86_expand_rint (rtx operand0, rtx operand1)
28675 /* C code for the stuff we're doing below:
28676 xa = fabs (operand1);
28677 if (!isless (xa, 2**52))
28679 xa = xa + 2**52 - 2**52;
28680 return copysign (xa, operand1);
28682 enum machine_mode mode = GET_MODE (operand0);
28683 rtx res, xa, label, TWO52, mask;
28685 res = gen_reg_rtx (mode);
28686 emit_move_insn (res, operand1);
28688 /* xa = abs (operand1) */
28689 xa = ix86_expand_sse_fabs (res, &mask);
28691 /* if (!isless (xa, TWO52)) goto label; */
28692 TWO52 = ix86_gen_TWO52 (mode);
28693 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28695 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28696 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28698 ix86_sse_copysign_to_positive (res, xa, res, mask);
28700 emit_label (label);
28701 LABEL_NUSES (label) = 1;
28703 emit_move_insn (operand0, res);
28706 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28709 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
28711 /* C code for the stuff we expand below.
28712 double xa = fabs (x), x2;
28713 if (!isless (xa, TWO52))
28715 xa = xa + TWO52 - TWO52;
28716 x2 = copysign (xa, x);
28725 enum machine_mode mode = GET_MODE (operand0);
28726 rtx xa, TWO52, tmp, label, one, res, mask;
28728 TWO52 = ix86_gen_TWO52 (mode);
28730 /* Temporary for holding the result, initialized to the input
28731 operand to ease control flow. */
28732 res = gen_reg_rtx (mode);
28733 emit_move_insn (res, operand1);
28735 /* xa = abs (operand1) */
28736 xa = ix86_expand_sse_fabs (res, &mask);
28738 /* if (!isless (xa, TWO52)) goto label; */
28739 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28741 /* xa = xa + TWO52 - TWO52; */
28742 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28743 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28745 /* xa = copysign (xa, operand1) */
28746 ix86_sse_copysign_to_positive (xa, xa, res, mask);
28748 /* generate 1.0 or -1.0 */
28749 one = force_reg (mode,
28750 const_double_from_real_value (do_floor
28751 ? dconst1 : dconstm1, mode));
28753 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28754 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28755 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28756 gen_rtx_AND (mode, one, tmp)));
28757 /* We always need to subtract here to preserve signed zero. */
28758 tmp = expand_simple_binop (mode, MINUS,
28759 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28760 emit_move_insn (res, tmp);
28762 emit_label (label);
28763 LABEL_NUSES (label) = 1;
28765 emit_move_insn (operand0, res);
28768 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28771 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
28773 /* C code for the stuff we expand below.
28774 double xa = fabs (x), x2;
28775 if (!isless (xa, TWO52))
28777 x2 = (double)(long)x;
28784 if (HONOR_SIGNED_ZEROS (mode))
28785 return copysign (x2, x);
28788 enum machine_mode mode = GET_MODE (operand0);
28789 rtx xa, xi, TWO52, tmp, label, one, res, mask;
28791 TWO52 = ix86_gen_TWO52 (mode);
28793 /* Temporary for holding the result, initialized to the input
28794 operand to ease control flow. */
28795 res = gen_reg_rtx (mode);
28796 emit_move_insn (res, operand1);
28798 /* xa = abs (operand1) */
28799 xa = ix86_expand_sse_fabs (res, &mask);
28801 /* if (!isless (xa, TWO52)) goto label; */
28802 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28804 /* xa = (double)(long)x */
28805 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28806 expand_fix (xi, res, 0);
28807 expand_float (xa, xi, 0);
28810 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28812 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28813 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28814 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28815 gen_rtx_AND (mode, one, tmp)));
28816 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
28817 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28818 emit_move_insn (res, tmp);
28820 if (HONOR_SIGNED_ZEROS (mode))
28821 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28823 emit_label (label);
28824 LABEL_NUSES (label) = 1;
28826 emit_move_insn (operand0, res);
28829 /* Expand SSE sequence for computing round from OPERAND1 storing
28830 into OPERAND0. Sequence that works without relying on DImode truncation
28831 via cvttsd2siq that is only available on 64bit targets. */
28833 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
28835 /* C code for the stuff we expand below.
28836 double xa = fabs (x), xa2, x2;
28837 if (!isless (xa, TWO52))
28839 Using the absolute value and copying back sign makes
28840 -0.0 -> -0.0 correct.
28841 xa2 = xa + TWO52 - TWO52;
28846 else if (dxa > 0.5)
28848 x2 = copysign (xa2, x);
28851 enum machine_mode mode = GET_MODE (operand0);
28852 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
28854 TWO52 = ix86_gen_TWO52 (mode);
28856 /* Temporary for holding the result, initialized to the input
28857 operand to ease control flow. */
28858 res = gen_reg_rtx (mode);
28859 emit_move_insn (res, operand1);
28861 /* xa = abs (operand1) */
28862 xa = ix86_expand_sse_fabs (res, &mask);
28864 /* if (!isless (xa, TWO52)) goto label; */
28865 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28867 /* xa2 = xa + TWO52 - TWO52; */
28868 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28869 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
28871 /* dxa = xa2 - xa; */
28872 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
28874 /* generate 0.5, 1.0 and -0.5 */
28875 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
28876 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
28877 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
28881 tmp = gen_reg_rtx (mode);
28882 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
28883 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
28884 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28885 gen_rtx_AND (mode, one, tmp)));
28886 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28887 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
28888 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
28889 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28890 gen_rtx_AND (mode, one, tmp)));
28891 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28893 /* res = copysign (xa2, operand1) */
28894 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
28896 emit_label (label);
28897 LABEL_NUSES (label) = 1;
28899 emit_move_insn (operand0, res);
28902 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28905 ix86_expand_trunc (rtx operand0, rtx operand1)
28907 /* C code for SSE variant we expand below.
28908 double xa = fabs (x), x2;
28909 if (!isless (xa, TWO52))
28911 x2 = (double)(long)x;
28912 if (HONOR_SIGNED_ZEROS (mode))
28913 return copysign (x2, x);
28916 enum machine_mode mode = GET_MODE (operand0);
28917 rtx xa, xi, TWO52, label, res, mask;
28919 TWO52 = ix86_gen_TWO52 (mode);
28921 /* Temporary for holding the result, initialized to the input
28922 operand to ease control flow. */
28923 res = gen_reg_rtx (mode);
28924 emit_move_insn (res, operand1);
28926 /* xa = abs (operand1) */
28927 xa = ix86_expand_sse_fabs (res, &mask);
28929 /* if (!isless (xa, TWO52)) goto label; */
28930 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28932 /* x = (double)(long)x */
28933 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28934 expand_fix (xi, res, 0);
28935 expand_float (res, xi, 0);
28937 if (HONOR_SIGNED_ZEROS (mode))
28938 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28940 emit_label (label);
28941 LABEL_NUSES (label) = 1;
28943 emit_move_insn (operand0, res);
28946 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28949 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
28951 enum machine_mode mode = GET_MODE (operand0);
28952 rtx xa, mask, TWO52, label, one, res, smask, tmp;
28954 /* C code for SSE variant we expand below.
28955 double xa = fabs (x), x2;
28956 if (!isless (xa, TWO52))
28958 xa2 = xa + TWO52 - TWO52;
28962 x2 = copysign (xa2, x);
28966 TWO52 = ix86_gen_TWO52 (mode);
28968 /* Temporary for holding the result, initialized to the input
28969 operand to ease control flow. */
28970 res = gen_reg_rtx (mode);
28971 emit_move_insn (res, operand1);
28973 /* xa = abs (operand1) */
28974 xa = ix86_expand_sse_fabs (res, &smask);
28976 /* if (!isless (xa, TWO52)) goto label; */
28977 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28979 /* res = xa + TWO52 - TWO52; */
28980 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28981 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
28982 emit_move_insn (res, tmp);
28985 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28987 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
28988 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
28989 emit_insn (gen_rtx_SET (VOIDmode, mask,
28990 gen_rtx_AND (mode, mask, one)));
28991 tmp = expand_simple_binop (mode, MINUS,
28992 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
28993 emit_move_insn (res, tmp);
28995 /* res = copysign (res, operand1) */
28996 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
28998 emit_label (label);
28999 LABEL_NUSES (label) = 1;
29001 emit_move_insn (operand0, res);
29004 /* Expand SSE sequence for computing round from OPERAND1 storing
29007 ix86_expand_round (rtx operand0, rtx operand1)
29009 /* C code for the stuff we're doing below:
29010 double xa = fabs (x);
29011 if (!isless (xa, TWO52))
29013 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29014 return copysign (xa, x);
29016 enum machine_mode mode = GET_MODE (operand0);
29017 rtx res, TWO52, xa, label, xi, half, mask;
29018 const struct real_format *fmt;
29019 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29021 /* Temporary for holding the result, initialized to the input
29022 operand to ease control flow. */
29023 res = gen_reg_rtx (mode);
29024 emit_move_insn (res, operand1);
29026 TWO52 = ix86_gen_TWO52 (mode);
29027 xa = ix86_expand_sse_fabs (res, &mask);
29028 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29030 /* load nextafter (0.5, 0.0) */
29031 fmt = REAL_MODE_FORMAT (mode);
29032 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29033 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29035 /* xa = xa + 0.5 */
29036 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29037 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29039 /* xa = (double)(int64_t)xa */
29040 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29041 expand_fix (xi, xa, 0);
29042 expand_float (xa, xi, 0);
29044 /* res = copysign (xa, operand1) */
29045 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29047 emit_label (label);
29048 LABEL_NUSES (label) = 1;
29050 emit_move_insn (operand0, res);
29054 /* Validate whether a SSE5 instruction is valid or not.
29055 OPERANDS is the array of operands.
29056 NUM is the number of operands.
29057 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29058 NUM_MEMORY is the maximum number of memory operands to accept.
29059 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29062 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29063 bool uses_oc0, int num_memory, bool commutative)
29069 /* Count the number of memory arguments */
29072 for (i = 0; i < num; i++)
29074 enum machine_mode mode = GET_MODE (operands[i]);
29075 if (register_operand (operands[i], mode))
29078 else if (memory_operand (operands[i], mode))
29080 mem_mask |= (1 << i);
29086 rtx pattern = PATTERN (insn);
29088 /* allow 0 for pcmov */
29089 if (GET_CODE (pattern) != SET
29090 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29092 || operands[i] != CONST0_RTX (mode))
29097 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29098 a memory operation. */
29099 if (num_memory < 0)
29101 num_memory = -num_memory;
29102 if ((mem_mask & (1 << (num-1))) != 0)
29104 mem_mask &= ~(1 << (num-1));
29109 /* If there were no memory operations, allow the insn */
29113 /* Do not allow the destination register to be a memory operand. */
29114 else if (mem_mask & (1 << 0))
29117 /* If there are too many memory operations, disallow the instruction. While
29118 the hardware only allows 1 memory reference, before register allocation
29119 for some insns, we allow two memory operations sometimes in order to allow
29120 code like the following to be optimized:
29122 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29124 or similar cases that are vectorized into using the fmaddss
29126 else if (mem_count > num_memory)
29129 /* Don't allow more than one memory operation if not optimizing. */
29130 else if (mem_count > 1 && !optimize)
29133 else if (num == 4 && mem_count == 1)
29135 /* formats (destination is the first argument), example fmaddss:
29136 xmm1, xmm1, xmm2, xmm3/mem
29137 xmm1, xmm1, xmm2/mem, xmm3
29138 xmm1, xmm2, xmm3/mem, xmm1
29139 xmm1, xmm2/mem, xmm3, xmm1 */
29141 return ((mem_mask == (1 << 1))
29142 || (mem_mask == (1 << 2))
29143 || (mem_mask == (1 << 3)));
29145 /* format, example pmacsdd:
29146 xmm1, xmm2, xmm3/mem, xmm1 */
29148 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29150 return (mem_mask == (1 << 2));
29153 else if (num == 4 && num_memory == 2)
29155 /* If there are two memory operations, we can load one of the memory ops
29156 into the destination register. This is for optimizing the
29157 multiply/add ops, which the combiner has optimized both the multiply
29158 and the add insns to have a memory operation. We have to be careful
29159 that the destination doesn't overlap with the inputs. */
29160 rtx op0 = operands[0];
29162 if (reg_mentioned_p (op0, operands[1])
29163 || reg_mentioned_p (op0, operands[2])
29164 || reg_mentioned_p (op0, operands[3]))
29167 /* formats (destination is the first argument), example fmaddss:
29168 xmm1, xmm1, xmm2, xmm3/mem
29169 xmm1, xmm1, xmm2/mem, xmm3
29170 xmm1, xmm2, xmm3/mem, xmm1
29171 xmm1, xmm2/mem, xmm3, xmm1
29173 For the oc0 case, we will load either operands[1] or operands[3] into
29174 operands[0], so any combination of 2 memory operands is ok. */
29178 /* format, example pmacsdd:
29179 xmm1, xmm2, xmm3/mem, xmm1
29181 For the integer multiply/add instructions be more restrictive and
29182 require operands[2] and operands[3] to be the memory operands. */
29184 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29186 return (mem_mask == ((1 << 2) | (1 << 3)));
29189 else if (num == 3 && num_memory == 1)
29191 /* formats, example protb:
29192 xmm1, xmm2, xmm3/mem
29193 xmm1, xmm2/mem, xmm3 */
29195 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29197 /* format, example comeq:
29198 xmm1, xmm2, xmm3/mem */
29200 return (mem_mask == (1 << 2));
29204 gcc_unreachable ();
29210 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29211 hardware will allow by using the destination register to load one of the
29212 memory operations. Presently this is used by the multiply/add routines to
29213 allow 2 memory references. */
29216 ix86_expand_sse5_multiple_memory (rtx operands[],
29218 enum machine_mode mode)
29220 rtx op0 = operands[0];
29222 || memory_operand (op0, mode)
29223 || reg_mentioned_p (op0, operands[1])
29224 || reg_mentioned_p (op0, operands[2])
29225 || reg_mentioned_p (op0, operands[3]))
29226 gcc_unreachable ();
29228 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29229 the destination register. */
29230 if (memory_operand (operands[1], mode))
29232 emit_move_insn (op0, operands[1]);
29235 else if (memory_operand (operands[3], mode))
29237 emit_move_insn (op0, operands[3]);
29241 gcc_unreachable ();
29247 /* Table of valid machine attributes. */
29248 static const struct attribute_spec ix86_attribute_table[] =
29250 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29251 /* Stdcall attribute says callee is responsible for popping arguments
29252 if they are not variable. */
29253 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29254 /* Fastcall attribute says callee is responsible for popping arguments
29255 if they are not variable. */
29256 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29257 /* Cdecl attribute says the callee is a normal C declaration */
29258 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29259 /* Regparm attribute specifies how many integer arguments are to be
29260 passed in registers. */
29261 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29262 /* Sseregparm attribute says we are using x86_64 calling conventions
29263 for FP arguments. */
29264 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29265 /* force_align_arg_pointer says this function realigns the stack at entry. */
29266 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29267 false, true, true, ix86_handle_cconv_attribute },
29268 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29269 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29270 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29271 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29273 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29274 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29275 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29276 SUBTARGET_ATTRIBUTE_TABLE,
29278 /* ms_abi and sysv_abi calling convention function attributes. */
29279 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29280 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29282 { NULL, 0, 0, false, false, false, NULL }
29285 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29287 x86_builtin_vectorization_cost (bool runtime_test)
29289 /* If the branch of the runtime test is taken - i.e. - the vectorized
29290 version is skipped - this incurs a misprediction cost (because the
29291 vectorized version is expected to be the fall-through). So we subtract
29292 the latency of a mispredicted branch from the costs that are incured
29293 when the vectorized version is executed.
29295 TODO: The values in individual target tables have to be tuned or new
29296 fields may be needed. For eg. on K8, the default branch path is the
29297 not-taken path. If the taken path is predicted correctly, the minimum
29298 penalty of going down the taken-path is 1 cycle. If the taken-path is
29299 not predicted correctly, then the minimum penalty is 10 cycles. */
29303 return (-(ix86_cost->cond_taken_branch_cost));
29309 /* This function returns the calling abi specific va_list type node.
29310 It returns the FNDECL specific va_list type. */
29313 ix86_fn_abi_va_list (tree fndecl)
29318 return va_list_type_node;
29319 gcc_assert (fndecl != NULL_TREE);
29320 abi = ix86_function_abi ((const_tree) fndecl);
29323 return ms_va_list_type_node;
29325 return sysv_va_list_type_node;
29328 /* Returns the canonical va_list type specified by TYPE. If there
29329 is no valid TYPE provided, it return NULL_TREE. */
29332 ix86_canonical_va_list_type (tree type)
29336 /* Resolve references and pointers to va_list type. */
29337 if (INDIRECT_REF_P (type))
29338 type = TREE_TYPE (type);
29339 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29340 type = TREE_TYPE (type);
29344 wtype = va_list_type_node;
29345 gcc_assert (wtype != NULL_TREE);
29347 if (TREE_CODE (wtype) == ARRAY_TYPE)
29349 /* If va_list is an array type, the argument may have decayed
29350 to a pointer type, e.g. by being passed to another function.
29351 In that case, unwrap both types so that we can compare the
29352 underlying records. */
29353 if (TREE_CODE (htype) == ARRAY_TYPE
29354 || POINTER_TYPE_P (htype))
29356 wtype = TREE_TYPE (wtype);
29357 htype = TREE_TYPE (htype);
29360 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29361 return va_list_type_node;
29362 wtype = sysv_va_list_type_node;
29363 gcc_assert (wtype != NULL_TREE);
29365 if (TREE_CODE (wtype) == ARRAY_TYPE)
29367 /* If va_list is an array type, the argument may have decayed
29368 to a pointer type, e.g. by being passed to another function.
29369 In that case, unwrap both types so that we can compare the
29370 underlying records. */
29371 if (TREE_CODE (htype) == ARRAY_TYPE
29372 || POINTER_TYPE_P (htype))
29374 wtype = TREE_TYPE (wtype);
29375 htype = TREE_TYPE (htype);
29378 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29379 return sysv_va_list_type_node;
29380 wtype = ms_va_list_type_node;
29381 gcc_assert (wtype != NULL_TREE);
29383 if (TREE_CODE (wtype) == ARRAY_TYPE)
29385 /* If va_list is an array type, the argument may have decayed
29386 to a pointer type, e.g. by being passed to another function.
29387 In that case, unwrap both types so that we can compare the
29388 underlying records. */
29389 if (TREE_CODE (htype) == ARRAY_TYPE
29390 || POINTER_TYPE_P (htype))
29392 wtype = TREE_TYPE (wtype);
29393 htype = TREE_TYPE (htype);
29396 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29397 return ms_va_list_type_node;
29400 return std_canonical_va_list_type (type);
29403 /* Iterate through the target-specific builtin types for va_list.
29404 IDX denotes the iterator, *PTREE is set to the result type of
29405 the va_list builtin, and *PNAME to its internal type.
29406 Returns zero if there is no element for this index, otherwise
29407 IDX should be increased upon the next call.
29408 Note, do not iterate a base builtin's name like __builtin_va_list.
29409 Used from c_common_nodes_and_builtins. */
29412 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
29418 *ptree = ms_va_list_type_node;
29419 *pname = "__builtin_ms_va_list";
29422 *ptree = sysv_va_list_type_node;
29423 *pname = "__builtin_sysv_va_list";
29431 /* Initialize the GCC target structure. */
29432 #undef TARGET_RETURN_IN_MEMORY
29433 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
29435 #undef TARGET_ATTRIBUTE_TABLE
29436 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
29437 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29438 # undef TARGET_MERGE_DECL_ATTRIBUTES
29439 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
29442 #undef TARGET_COMP_TYPE_ATTRIBUTES
29443 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
29445 #undef TARGET_INIT_BUILTINS
29446 #define TARGET_INIT_BUILTINS ix86_init_builtins
29447 #undef TARGET_EXPAND_BUILTIN
29448 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
29450 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
29451 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
29452 ix86_builtin_vectorized_function
29454 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
29455 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
29457 #undef TARGET_BUILTIN_RECIPROCAL
29458 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
29460 #undef TARGET_ASM_FUNCTION_EPILOGUE
29461 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
29463 #undef TARGET_ENCODE_SECTION_INFO
29464 #ifndef SUBTARGET_ENCODE_SECTION_INFO
29465 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
29467 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
29470 #undef TARGET_ASM_OPEN_PAREN
29471 #define TARGET_ASM_OPEN_PAREN ""
29472 #undef TARGET_ASM_CLOSE_PAREN
29473 #define TARGET_ASM_CLOSE_PAREN ""
29475 #undef TARGET_ASM_ALIGNED_HI_OP
29476 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
29477 #undef TARGET_ASM_ALIGNED_SI_OP
29478 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
29480 #undef TARGET_ASM_ALIGNED_DI_OP
29481 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
29484 #undef TARGET_ASM_UNALIGNED_HI_OP
29485 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
29486 #undef TARGET_ASM_UNALIGNED_SI_OP
29487 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
29488 #undef TARGET_ASM_UNALIGNED_DI_OP
29489 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
29491 #undef TARGET_SCHED_ADJUST_COST
29492 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
29493 #undef TARGET_SCHED_ISSUE_RATE
29494 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
29495 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
29496 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
29497 ia32_multipass_dfa_lookahead
29499 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
29500 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
29503 #undef TARGET_HAVE_TLS
29504 #define TARGET_HAVE_TLS true
29506 #undef TARGET_CANNOT_FORCE_CONST_MEM
29507 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
29508 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
29509 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
29511 #undef TARGET_DELEGITIMIZE_ADDRESS
29512 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
29514 #undef TARGET_MS_BITFIELD_LAYOUT_P
29515 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
29518 #undef TARGET_BINDS_LOCAL_P
29519 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
29521 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29522 #undef TARGET_BINDS_LOCAL_P
29523 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
29526 #undef TARGET_ASM_OUTPUT_MI_THUNK
29527 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
29528 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
29529 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
29531 #undef TARGET_ASM_FILE_START
29532 #define TARGET_ASM_FILE_START x86_file_start
29534 #undef TARGET_DEFAULT_TARGET_FLAGS
29535 #define TARGET_DEFAULT_TARGET_FLAGS \
29537 | TARGET_SUBTARGET_DEFAULT \
29538 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
29540 #undef TARGET_HANDLE_OPTION
29541 #define TARGET_HANDLE_OPTION ix86_handle_option
29543 #undef TARGET_RTX_COSTS
29544 #define TARGET_RTX_COSTS ix86_rtx_costs
29545 #undef TARGET_ADDRESS_COST
29546 #define TARGET_ADDRESS_COST ix86_address_cost
29548 #undef TARGET_FIXED_CONDITION_CODE_REGS
29549 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
29550 #undef TARGET_CC_MODES_COMPATIBLE
29551 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
29553 #undef TARGET_MACHINE_DEPENDENT_REORG
29554 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
29556 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
29557 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
29559 #undef TARGET_BUILD_BUILTIN_VA_LIST
29560 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
29562 #undef TARGET_FN_ABI_VA_LIST
29563 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
29565 #undef TARGET_CANONICAL_VA_LIST_TYPE
29566 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
29568 #undef TARGET_EXPAND_BUILTIN_VA_START
29569 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
29571 #undef TARGET_MD_ASM_CLOBBERS
29572 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
29574 #undef TARGET_PROMOTE_PROTOTYPES
29575 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
29576 #undef TARGET_STRUCT_VALUE_RTX
29577 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
29578 #undef TARGET_SETUP_INCOMING_VARARGS
29579 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
29580 #undef TARGET_MUST_PASS_IN_STACK
29581 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
29582 #undef TARGET_PASS_BY_REFERENCE
29583 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
29584 #undef TARGET_INTERNAL_ARG_POINTER
29585 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
29586 #undef TARGET_UPDATE_STACK_BOUNDARY
29587 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
29588 #undef TARGET_GET_DRAP_RTX
29589 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
29590 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
29591 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
29592 #undef TARGET_STRICT_ARGUMENT_NAMING
29593 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
29595 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
29596 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
29598 #undef TARGET_SCALAR_MODE_SUPPORTED_P
29599 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
29601 #undef TARGET_VECTOR_MODE_SUPPORTED_P
29602 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
29604 #undef TARGET_C_MODE_FOR_SUFFIX
29605 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
29608 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
29609 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
29612 #ifdef SUBTARGET_INSERT_ATTRIBUTES
29613 #undef TARGET_INSERT_ATTRIBUTES
29614 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
29617 #undef TARGET_MANGLE_TYPE
29618 #define TARGET_MANGLE_TYPE ix86_mangle_type
29620 #undef TARGET_STACK_PROTECT_FAIL
29621 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
29623 #undef TARGET_FUNCTION_VALUE
29624 #define TARGET_FUNCTION_VALUE ix86_function_value
29626 #undef TARGET_SECONDARY_RELOAD
29627 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
29629 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
29630 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
29632 #undef TARGET_SET_CURRENT_FUNCTION
29633 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
29635 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
29636 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
29638 #undef TARGET_OPTION_SAVE
29639 #define TARGET_OPTION_SAVE ix86_function_specific_save
29641 #undef TARGET_OPTION_RESTORE
29642 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
29644 #undef TARGET_OPTION_PRINT
29645 #define TARGET_OPTION_PRINT ix86_function_specific_print
29647 #undef TARGET_OPTION_CAN_INLINE_P
29648 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
29650 #undef TARGET_EXPAND_TO_RTL_HOOK
29651 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
29653 struct gcc_target targetm = TARGET_INITIALIZER;
29655 #include "gt-i386.h"